##   reliability sex  subj agem experiment exp_oldmapping        exp     cost
## 1          NA   m  S1_1 10.5 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
## 2          NA   f S1_10  9.9 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
## 3          NA   m S1_11  9.8 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
## 4          NA   m S1_12  9.9 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
## 5          NA   f S1_13 10.2 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
## 6          NA   f S1_14 10.0 LUTS.Exp.1     LUTS.Exp.1 LUTS.Exp.1 Barriers
##   video_quality audio_quality device highchair HV_side first_test first_fam
## 1            NA            NA               NA    left         HV        HL
## 2            NA            NA               NA    left         LV        LH
## 3            NA            NA               NA    left         HV        HL
## 4            NA            NA               NA   right         LV        LH
## 5            NA            NA               NA   right         HV        LH
## 6            NA            NA               NA   right         LV        HL
##   first_test_deeper_side control_deeper_side control_firstevent control_1
## 1                                                                        
## 2                                                                        
## 3                                                                        
## 4                                                                        
## 5                                                                        
## 6                                                                        
##   control_2 fam1  fam2  fam3  fam4  fam5  fam6 test1 test2 test3 test4 avg_fam
## 1             60    60    60    60 59.12  9.75  32.8  6.38  14.2    60      51
## 2             60    60 50.06 23.11 13.93 14.26 54.04 20.82 11.82 19.46      37
## 3             60    60 48.73    60 29.89 36.65 52.47 18.15 34.18 30.82      49
## 4             60    60    60 37.15 13.48 51.68 33.26 45.95  5.65 14.07      47
## 5             60    60    60 40.67 28.57 10.77  7.22  6.86   6.5  <NA>      43
## 6             60 11.14 56.06    60 32.97 28.77 17.37  9.95 12.22  6.71      41
##   sum_fam testavg_lower testavg_higher lower1 lower2 higher1 higher2
## 1     309          33.2           23.5   6.38     60    32.8    14.2
## 2     221          32.9           20.1  54.04  11.82   20.82   19.46
## 3     295          24.5           43.3  18.15  30.82   52.47   34.18
## 4     282          19.5           30.0  33.26   5.65   45.95   14.07
## 5     260           6.9            6.9   6.86   <NA>    7.22     6.5
## 6     249          14.8            8.3  17.37  12.22    9.95    6.71
##   control_shallow control_deep
## 1                             
## 2                             
## 3                             
## 4                             
## 5                             
## 6
## 
##   f   m 
## 105 101
# function for identifying influential observations, and then returning a new model without them
# INPUTS: model = model name, data = dataset, and subj = column heading for observations
# OUTPUT: model excluding influential subjects
exclude.cooks <- function(model, data, subj) {
  cooks <- cooks.distance(influence(model, subj))
  cutoff <- 4/length(unique(data$subj))
  new.model <- exclude.influence(model, grouping = subj, level = data[which(cooks > cutoff),]$subj)
  return(new.model)
}

# function that computes CIs and returns them in df
gen.ci <- function(model) {
  df <- data.frame(confint(model))
  names(df) <- c("lower", "upper")
  return(df)
}

# function that converts model summary to df
gen.m <- function(model) {
  df <- data.frame(coef(summary(model)))
  names(df) <- c("est", "se", "df", "t", "p")
  return(df)
}

# function that returns column of standardized betas from lmer model
gen.beta <- function(model) {
  f <- data.frame(fixef(model))
  colnames(f) <- "beta"
  return(f)
}
# function that returns age info and number of female infants in a dataset

info <- function(longdata) {
  longdata %>% 
  group_by(subj) %>%
  filter(row_number() == 1) %>%
  ungroup() %>%
  summarize(mean = mean(agem), min = range(agem)[1], max = range(agem)[2], f = sum(sex == "f"), n = length(unique(subj)))
}

## Retrieved from : http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#error-bars-for-within-subjects-variables
## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = TRUE,
                      conf.interval = .95, .drop = TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm == T, don't count them
  length2 <- function (x, na.rm = FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop = .drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm = na.rm),
                     mean = mean   (xx[[col]], na.rm = na.rm),
                     sd   = sd     (xx[[col]], na.rm = na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column    
  datac <- plyr::rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval: 
  # e.g., if conf.interval is .95, use .975 (above/below), and use df = N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}
## Norms the data within specified groups in a data frame; it normalizes each
## subject (identified by idvar) so that they have the same mean, within each group
## specified by betweenvars.
##   data: a data frame.
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   na.rm: a boolean that indicates whether to ignore NA's
normDataWithin <- function(data = NULL, idvar, measurevar, betweenvars = NULL,
                           na.rm = TRUE, .drop = TRUE) {
  library(plyr)
  
  # Measure var on left, idvar + between vars on right of formula.
  data.subjMean <- ddply(data, c(idvar, betweenvars), .drop = .drop,
                         .fun = function(xx, col, na.rm) {
                           c(subjMean = mean(xx[,col], na.rm = na.rm))
                         },
                         measurevar,
                         na.rm
  )
  
  # Put the subject means with original data
  data <- merge(data, data.subjMean)
  
  # Get the normalized data in a new column
  measureNormedVar <- paste(measurevar, "_norm", sep = "")
  data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
    mean(data[,measurevar], na.rm = na.rm)
  
  # Remove this subject mean column
  data$subjMean <- NULL
  
  return(data)
}

## Summarizes data, handling within-subjects variables by removing inter-subject variability.
## It will still work if there are no within-S variables.
## Gives count, un-normed mean, normed mean (with same between-group mean),
##   standard deviation, standard error of the mean, and confidence interval.
## If there are within-subject variables, calculate adjusted values using method from Morey (2008).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   withinvars: a vector containing names of columns that are within-subjects variables
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data = NULL, measurevar, betweenvars = NULL, withinvars = NULL,
                            idvar = NULL, na.rm = TRUE, conf.interval = .95, .drop = TRUE) {
  
  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop = FALSE],
                       FUN = is.factor, FUN.VALUE = logical(1))
  
  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }
  
  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars = c(betweenvars, withinvars),
                     na.rm = na.rm, conf.interval = conf.interval, .drop = .drop)
  
  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL
  
  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop = .drop)
  
  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep = "")
  
  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars = c(betweenvars, withinvars),
                      na.rm = na.rm, conf.interval = conf.interval, .drop = .drop)
  
  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop = FALSE], FUN = nlevels,
                                  FUN.VALUE = numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )
  
  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor
  
  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}

# function that returns ICC 
reporticc <- function(output, places) {
  mainstat <- output$value
  upperci <- output$ubound
  lowerci <- output$lbound
  statistic <- paste("ICC = ", round(mainstat,places), ", 95% CI [", round(lowerci, places), ", ", round(upperci, places), "]", sep = "")
  return(statistic)
}

# function that returns APA-formatted result from lme4/lmerTest table

# version 1 that reports ci, b, beta, se, p
report <- function(table, index, places, tails, flip) {
  if (tails == "1") {
    p.value <- round(table$p[index], 3)/2 # p values always rounded to 3 places
    howmanytails <- "one-tailed"
  } else {
    p.value <- round(table$p[index], 3) # p values always rounded to 3 places
    howmanytails <- "two-tailed"
  }
  if (p.value < .001) {
    p <- "<.001"
  } else {
    p <- paste(" = ", str_remove(p.value, "^0+"), sep = "") 
  }
  if (missing(flip)) {
    result <- paste("[", round(table$lower[index], places), ",", round(table$upper[index], places), "], ß = ", round(table$beta[index], places), ", B = ", round(table$est[index],places), ", SE = ", round(table$se[index],places), ", p", p, ", ", howmanytails, sep = "")
  } else {
    result <- paste("[", -round(table$upper[index], places), ",", -round(table$lower[index], places), "], ß = ", -round(table$beta[index], places), ", B = ", -round(table$est[index],places), ", SE = ", round(table$se[index],places), ", p", p, ", ", howmanytails, sep = "")
  }
  return(result)
}

# version 2, more condensed, that reports ci, beta, t(df), p
report2 <- function(table, index, places, tails, flip) {
  if (tails == "1") {
    p.value <- round(table$p[index], 3)/2 # p values always rounded to 3 places
    howmanytails <- "one-tailed"
  } else {
    p.value <- round(table$p[index], 3) # p values always rounded to 3 places
    howmanytails <- "two-tailed"
  }
  if (p.value < .001) {
    p <- "<.001"
  } else {
    p <- paste(" = ", str_remove(p.value, "^0+"), sep = "") 
  }
  if (missing(flip)) {
    result <- paste("[", round(table$lower[index], places), ",", round(table$upper[index], places), "], ß = ", round(table$beta[index], places), ", t(", round(table$df[index],2), ") = ", round(table$t[index], places), ", p", p, ", ", howmanytails, sep = "")
  } else {
    result <- paste("[", -round(table$upper[index], places), ",", -round(table$lower[index], places), "], ß = ", -round(table$beta[index], places), ", t(", round(table$df[index],2), ") = ", -round(table$t[index], places), ", p", p, ", ", howmanytails, sep = "")
  }
  return(result)
}
## get within-subjects CIs for plotting

# warning about Nan has to do with missing observations for control events
summary.avg <- summarySEwithin(data = risk.avg, measurevar = "look", betweenvars = c("exp"), withinvars = c("type", "phase"), idvar = "subj") %>%
  drop_na() %>%  
  mutate(cliff = type)
levels(summary.avg$cliff) <- c("deep", "deep", "shallow", "shallow")
levels(summary.avg$phase) <- c("control", "test")
# figure out how many looks are missing from the dataframe
nexclude <- wide %>%
   gather(type, look, control_1:test4) %>%
   filter(cost == "Danger") %>%
   mutate(missing = case_when(is.na(look) | str_detect(look, "NA") ~ 1)) %>%
   group_by(exp) %>%
   count(missing) %>%
  filter(!is.na(missing)) %>%
  rename(n_missing = n) %>%
  select(!missing)

experiments <- c("Exp.1", "Exp.2", "Exp.3")
totaltrials.Exp1 <- wide %>%
   gather(type, look, fam1:test4) %>% # no control trials
   filter(cost == "Danger") %>%
   filter(exp == "Exp.1") %>%
   group_by(exp) %>%
   tally() %>%
   rename(total = n)

totaltrials.Exp23 <- wide %>%
   gather(type, look, control_1:test4) %>%
   filter(cost == "Danger") %>%
   filter(exp == "Exp.2" | exp == "Exp.3") %>%
   group_by(exp) %>%
   tally() %>%
   rename(total = n)

ntrials <- full_join(nexclude, rbind(totaltrials.Exp1, totaltrials.Exp23)) %>% na.omit()

Reliability + Distribution Info

rel <- read.csv(file = "peril_reliability_deid.csv", header = TRUE)
exp1 <- rel %>% filter(experiment.paper == "Exp1")
exp2 <- rel %>% filter(experiment.paper == "Exp2")
exp3 <- rel %>% filter(experiment.paper == "Exp3")
exp4.study1 <- rel %>% filter(experiment.paper == "Exp4.Study1")
exp4.study2 <- rel %>% filter(experiment.paper == "Exp4.Study2")
exp4.study3 <- rel %>% filter(experiment.paper == "Exp4.Study3")

exp1rel <- icc(data.frame(exp1$secondary.look, exp1$orig.look),model = "one", type = "agreement")

exp2rel <- icc(data.frame(exp2$secondary.look, exp2$orig.look),model = "one", type = "agreement")

exp3rel <- icc(data.frame(exp3$secondary.look, exp3$orig.look),model = "one", type = "agreement")

exp4.study1.rel <- icc(data.frame(exp4.study1$secondary.look, exp4.study1$orig.look),model = "one", type = "agreement")

exp4.study2.rel <- icc(data.frame(exp4.study2$secondary.look, exp4.study2$orig.look),model = "one", type = "agreement")

exp4.study3.rel <- icc(data.frame(exp4.study3$secondary.look, exp4.study3$orig.look),model = "one", type = "agreement")
normal.ll <- fitdistr(na.omit(risk.avg$look), "normal")$loglik
lognormal.ll <- fitdistr(na.omit(risk.avg$look), "lognormal")$loglik

Figures

theme_set(theme_cowplot(font_size = 20))

exp1.fig.data <- risk.avg %>% filter(task == "infer.value",
                                           phase == "testavg")
levels(exp1.fig.data$type) <- c(NA, "higher", "lower", NA)
exp1.fig.data$type <- relevel(exp1.fig.data$type, ref = "higher")
colors1 <- c(wes_palettes$Zissou1[3], wes_palettes$Zissou1[2])

risk1 <- ggplot(data = exp1.fig.data %>% filter(exp == "Exp.1"), aes(type, look, fill = type)) +
  geom_boxplot() +
  scale_fill_manual(values = colors1) +
    geom_errorbar(data = summary.avg %>% filter(exp == "Exp.1"), colour = "red", position = position_dodge(width = 5), width = 0, aes(ymin = look-ci, ymax = look+ci)) +
  stat_summary(fun = mean, alpha = 0.8, geom = "point", shape = 21, size = 3, position = "dodge", colour = "red", fill = "red") +
  ylab("Looking Time (s)") +
  xlab("Test event") +
  coord_cartesian(ylim = c(0, 65)) +
  geom_point(alpha = 0.1) +
  geom_line(alpha = 0.2, aes(group = subj)) +
  theme(legend.position = "none") +
  scale_x_discrete(labels = c("higher\nvalue", "lower\nvalue"))
  # annotate("text", colour = "red", x = 1.5, y = 63, size = 30, label = c("*ß = 0.354"))

risk1

Figure 2. Looking time towards test events in Experiment 2.

exp23.figure <- rbind(exp2.avg,exp3.avg) %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))
exp23.figure$cliff <- as.factor(exp23.figure$cliff)
exp23.figure$cliff <- relevel(exp23.figure$cliff, ref = "shallow")
exp23.figure$phase <- as.factor(exp23.figure$phase)

levels(exp23.figure$cliff)
## [1] "shallow" "deep"
levels(exp23.figure$phase) <- c("control", "test")
exp23.colors <- c(wes_palette("Royal2")[2], wes_palette("Royal2")[1])

exp2.figure <- ggplot(data = exp23.figure %>% filter(exp == "Exp.2"), aes(cliff, look, fill = cliff)) +
  geom_boxplot(aes(alpha = phase)) +
  stat_summary(fun = mean, alpha = 0.8, geom = "point", shape = 21, size = 3, position = "dodge", colour = "red", fill = "red") +
  geom_errorbar(data = summary.avg %>% filter(exp == "Exp.2"), colour = "red", position = position_dodge(width = 5), width = 0, aes(ymin = look-ci, ymax = look+ci)) +
  ylab("Looking Time (s)") +
  xlab("Cliff Depth") +
  coord_cartesian(ylim = c(0, 65)) +
  geom_point(alpha = 0.1) +
  geom_line(alpha = 0.2, aes(group = subj)) +
  facet_wrap(~phase, nrow = 1) +
  theme(legend.position = "none") +
  scale_fill_manual(values = exp23.colors) +
  scale_alpha_discrete(range = c(0.4, 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  labs(subtitle = "Experiment 2")

exp3.figure <- ggplot(data = exp23.figure %>% filter(exp == "Exp.3"), aes(cliff, look, fill = cliff)) +
  geom_boxplot(aes(alpha = phase)) +
  stat_summary(fun = mean, alpha = 0.8, geom = "point", shape = 21, size = 3, position = "dodge", colour = "red", fill = "red") +
  geom_errorbar(data = summary.avg %>% filter(exp == "Exp.3"), colour = "red", position = position_dodge(width = 5), width = 0, aes(ymin = look-ci, ymax = look+ci)) +
  ylab("Looking Time (s)") +
  xlab("Cliff Depth") +
  coord_cartesian(ylim = c(0, 65)) +
  geom_point(alpha = 0.1) +
  geom_line(alpha = 0.2, aes(group = subj)) +
  facet_wrap(~phase, nrow = 1) +
  theme(legend.position = "none") +
  scale_fill_manual(values = exp23.colors) +
  scale_alpha_discrete(range = c(0.4, 1)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  labs(subtitle = "Experiment 3")


exp2.figure + exp3.figure + plot_annotation(tag_levels = 'A')

Figure 4. Looking times from Experiments 2 (A, in-lab) and 3 (B, online) during the control events (lighter) and the test events (darker).

Experiment 1: Inferring value from risk

exp1.avg$type <- relevel(exp1.avg$type, ref = "higher")


exp1.0 <- lmer(loglook ~ 1 + (1|subj),
               data = exp1.avg)

exp1.1 <- lmer(loglook ~ type + (1|subj),
               data = exp1.avg)

# no influential observations
plot(influence(exp1.1, "subj"), which = "cook",
     cutoff = 4/32, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

exp1.1.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp1.avg %>% filter(subj != "S4_13"))
exp1.1.table <- gen.m(exp1.1)
exp1.1.ci <- gen.ci(exp1.1)[3:4,]
exp1.1.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp1.avg)
exp1.1.betas <- gen.beta(exp1.1.beta)
exp1.results <- cbind(exp1.1.table, exp1.1.betas,exp1.1.ci)

# effect size
exp1.cohensd <- lme.dscore(exp1.1,
           data = exp1.avg %>% filter(subj != "S4_13"),
           type = "lme4") %>% select(d) %>% as.numeric()

Methods

Participants

Our final sample of participants included 32 thirteen-month-old infants (M = 12.89 months, range = 12.57-13.47, 17 female). Seven infants were excluded and replaced due to fussiness (3 infants) or inattentiveness during test trials (4 infants). Participants were recruited through a database of families who expressed interest in cognitive development research in the Boston area. Of the families in this database who chose to provide demographic information, 79.5% identified their children as White, 10.2% as Asian, 6.9% as Other, 2.5% as Black or African American, 0.4% as American Indian/Alaska Native, and 0.4% as Native Hawaiian/Pacific Islander; 90.3% as not Hispanic or Latino, 9.5% as Hispanic or Latino, and 0.2% as both. Most families in the database (90.4%) had at least one parent or legal guardian with a college diploma or higher. All data were collected at the Harvard Laboratory for Developmental Studies with procedures approved by the Committee on the Use of Human Subjects. We studied 13-month-old infants, rather than the 10-month-old infants tested in our past research [13]) because the younger infants lack experiences with walking and falling that may foster the development of these abilities. The sample size was chosen based on a simulation power analysis over the confirmatory analyses from 2 previous experiments with similar structure, conducted with 10-month-old infants: Experiments 1-2 from [13]), and we collected data until we attained our pre-specified N. The full pre-registration document, including full details about methods, sample size, hypotheses, and analysis plan, is available at https://osf.io/bfvdc/files/.

Data Coding and Analysis Strategy.

Infant looking times were coded online using XHAB (Pinto, 1995), and offline using Datavyu (Datavyu Team, 2014). All experimenters and coders were naive to the order of the test events and unable to see the video events (they relied on sound cues to start each trial). To check for exclusions and coding errors, all test trial data were re-coded in Datavyu and excluded if an infant looked away from a test event without ever having seen the agent jump, or if the trial ended too early or late (15 out of 320 total familiarization trials). We used these offline coded looking times for our final analyses. To assess the reliability of the data, (160 out of 320 trials) were re-coded in Datavyu by an additional researcher who was naive to test event order. Reliability was high, ICC = 0.97, 95% CI [0.95, 0.98]. All decisions to include or exclude trials or participants from our analysis were made by researchers who did not know the order of events shown to that infant.

Infant looking times often are log-normally distributed, including in this dataset (log-likelihood of average looking times during test and control trials for Experiments 1-3 under normal distribution -2624.45, under lognormal distribution = -2456.77). Our pre-registered dependent measure therefore was the average looking time towards the higher- or lower-danger choice at test in log seconds. We report the values of unstandardized B coefficients and 95% confidence intervals in this unit, but our summary statistics and plots feature untransformed looking times for interpretability. We analyzed all looking times using mixed effects models (Bates et al., 2015) implemented in R (R Core Team, 2020). Analyses with repeated measures included a random intercept for participant identity; those conducted over multiple experiments included a random intercept for experiment. For every model, we checked for influential participants using Cook’s Distance (Nieuwenhuis et al., 2012) and excluded participants who exceeded the standard 4/n threshold, where n is the number of participants. The number of participants who met this criterion is listed in every model result; including or excluding them does not change the interpretation of any primary analysis (for results including all observations, see SOM). Data manipulation and plotting were conducted using tidyverse packages (Wickham et al., 2019). Cohen’s D derived from lme models were calculated using the EMAtools package (Kleiman, 2017). To enhance reproducibility, all results were written in R Markdown (Xie et al., 2018).

Results

Pre-registered results.

Infants looked longer when the agent chose the target achieved through the less dangerous action (Mlowervalue = 24.6s, pooled standard error (SE) = 1.14) than when the agent chose the target achieved through the more dangerous action (Mhighervalue = 21.48s, SE = 1.14 , 95% confidence interval (CI) over difference in log seconds [0.02,0.41], ß = 0.35, t(31) = 2.18, p = .037, two-tailed, Cohen’s d = 0.8, no influential participants). As in the experiments of Liu et al. (2017) using closely similar methods, but presenting physically different actions on the two test trials, infants looked longer when this expected outcome did not occur.

Experiment 2: Minimizing Risk

exp2.test <- exp2.avg %>% filter(phase == "testavg") 

exp2.info <- info(exp2.avg)

exp2.0 <- lmer(loglook ~ 1 + (1|subj),
               data = exp2.test)

exp2.1 <- lmer(loglook ~ type + (1|subj),
               data = exp2.test)

# id influential observations
plot(influence(exp2.1, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# one influential observation

exp2.1.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp2.test %>% filter(subj != "24-MR"))
exp2.1.table <- gen.m(exp2.1.cooks)
exp2.1.ci <- gen.ci(exp2.1.cooks)[3:4,]

exp2.1.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp2.test %>% filter(subj != "24-MR"))
exp2.1.betas <- gen.beta(exp2.1.beta)

exp2.results <- cbind(exp2.1.table, exp2.1.betas, exp2.1.ci)

# effect size
exp2.cohensd <- lme.dscore(exp2.1.cooks,
           data = exp2.test %>% filter(subj != "24-MR"),
           type = "lme4") %>% select(d) %>% as.numeric() * -1
exp2.control <- exp2.avg %>% filter(phase == "control") 

exp2.2 <- lmer(loglook ~ type + (1|subj),
               data = exp2.control)

# id influential observations
plot(influence(exp2.2, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# two influential observation

exp2.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp2.control %>% filter(subj != "59-MR" & subj != "54-MR"))

exp2.table <- gen.m(exp2.cooks)
exp2.ci <- gen.ci(exp2.cooks)[3:4,]

exp2.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp2.control %>% filter(subj != "59-MR" & subj != "54-MR"))
exp2.betas <- gen.beta(exp2.beta)

exp2.results2 <- cbind(exp2.table, exp2.betas, exp2.ci)

# effect size
exp2.control.cohensd <- lme.dscore(exp2.cooks,
           data = exp2.control %>% filter(subj != "59-MR" & subj != "54-MR"),
           type = "lme4") %>% select(d) %>% as.numeric() * -1
exp2.control.test <- exp2.avg %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

  
exp2.control.test$type <- as.factor(exp2.control.test$type)

exp2.3 <- lmer(loglook ~ cliff * phase + (1|subj),
                     data = exp2.control.test)

plot(influence(exp2.3, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# no influential observations

lsmeans(exp2.3, pairwise ~ cliff | phase)
## $lsmeans
## phase = control:
##  cliff   lsmean    SE  df lower.CL upper.CL
##  deep      2.40 0.116 100     2.17      2.6
##  shallow   2.61 0.112  97     2.39      2.8
## 
## phase = testavg:
##  cliff   lsmean    SE  df lower.CL upper.CL
##  deep      3.11 0.112  97     2.89      3.3
##  shallow   2.83 0.112  97     2.60      3.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## phase = control:
##  contrast       estimate    SE df t.ratio p.value
##  deep - shallow   -0.212 0.141 86 -1.500  0.1360 
## 
## phase = testavg:
##  contrast       estimate    SE df t.ratio p.value
##  deep - shallow    0.286 0.138 85  2.070  0.0410 
## 
## Degrees-of-freedom method: kenward-roger
exp2.3.table <- gen.m(exp2.3)
exp2.3.ci <- gen.ci(exp2.3)[3:6,]

exp2.3.beta <- lmer(scale(loglook) ~ cliff * phase + (1|subj),
                     data = exp2.control.test)
exp2.3.betas <- gen.beta(exp2.3.beta)

exp2.results3 <- cbind(exp2.3.table,exp2.3.betas,exp2.3.ci)

# effect size
exp2.interaction.cohensd <- lme.dscore(exp2.3,
           data = exp2.control.test,
           type = "lme4") %>% slice(3) %>% select(d) %>% as.numeric() * -1

This study was originally pre-registered with a sample including both 10-month-old and 13-month-old infants. Because our investigation with 10-month-old infants is still ongoing, we deviate from our pre-registration by reporting only results from the older age group. Data from both age groups are open access at https://osf.io/kz7br/.

Methods

Participants

Our final sample of participants included 30 thirteen-month-old infants (M = 12.89 months, range = 12.53-13.5, 12 female). We chose this sample size using a simulation power analysis over the confirmatory analysis of data from a pilot study, as well as estimates of effect sizes of studies with similar displays and design (S. Liu et al., 2017; S. Liu & Spelke, 2017). Our pre-registration document is available at https://osf.io/efc3g/. We collected data until we attained our pre-specified N. Infants were excluded and replaced in the final sample due to fussiness that prevented study completion (3 infants), inattentiveness during test trials (2 infants), or interference from caregivers (2 infants).

Data Coding and Analysis

The data coding and analysis strategies were the same as in Experiment 1. Twenty-five out of 360 total familiarization, control, and test trials were excluded from the analysis based on inattentiveness or coding error. Half the test trials from the experiment (60/120 trials) were re-coded in Datavyu by an additional researcher who was naive to test event order. Reliability was high, ICC = 1, 95% CI [1, 1].

Results

Infants looked longer when the agent, at test, chose to cross the deeper over the shallower trench (Mdeep = 26.5s, SE = 1.61; Mshallow = 21.64s, SE = 1.95; [0.03,0.43], ß = 0.36, t(28) = 2.33, p = .0135, one-tailed, d = 0.88, excluding one influential participant).

In contrast, when infants’ attention was drawn to each trench by an attention-getting star that appeared in the path of the agent’s subsequent actions, infants looked longer at events near the shallow trench (Mdeep = 12.73s, SE = 1.11; Mshallow = 16.02s, SE = 2; [-0.31,-0.08], ß = -0.34, t(25.1) = -3.24, p = .003, two-tailed, d = -1.29,excluding 2 influential participants). Looking preferences between the control and test events differed significantly ([0.11,0.88], ß = 0.75, t(84.74) = 2.52, p = .013, two-tailed, d = 0.55, no influential observations). See Figure 4A.

Experiment 3: Minimizing risk replication (no shattering)

exp3.control.test <- exp3.avg %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

exp3.avg.test <- exp3.avg %>% filter(phase == "testavg") 

exp3.info <- info(exp3.avg)

exp3.0 <- lmer(loglook ~ 1 + (1|subj),
               data = exp3.avg.test)

exp3.1 <- lmer(loglook ~ type + (1|subj),
               data = exp3.avg.test)

# id influential observations
plot(influence(exp3.1, "subj"), which = "cook",
     cutoff = 4/42, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# no influential observations

exp3.1.table <- gen.m(exp3.1)
exp3.1.ci <- gen.ci(exp3.1)[3:4,]

exp3.1.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp3.avg.test)
exp3.1.betas <- gen.beta(exp3.1.beta)

exp3.results <- cbind(exp3.1.table, exp3.1.betas, exp3.1.ci)

# effect size
exp3.cohensd <- lme.dscore(exp3.1,
           data = exp3.avg.test,
           type = "lme4") %>% select(d) %>% as.numeric() * 1
exp3.control <- exp3.control.test %>% filter(phase == "control") 

exp3.2 <- lmer(loglook ~ type + (1|subj),
               data = exp3.control)

# id influential observations
plot(influence(exp3.2, "subj"), which = "cook",
     cutoff = 4/42, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# two influential observation

exp3.2.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp3.control %>% filter(subj != "26" & subj != "28"))


exp3.2.table <- gen.m(exp3.2.cooks)
exp3.2.ci <- gen.ci(exp3.2.cooks)[3:4,]

exp3.2.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp3.control %>% filter(subj != "26" & subj != "28"))
exp3.2.betas <- gen.beta(exp3.2.cooks)

exp3.results2 <- cbind(exp3.2.table, exp3.2.betas, exp3.2.ci)

# effect size
exp3.control.cohensd <- lme.dscore(exp3.2.cooks,
           data = exp3.control %>% filter(subj != "26" & subj != "28"),
           type = "lme4") %>% select(d) %>% as.numeric() * -1
exp3.control.test$type <- as.factor(exp3.control.test$type)

exp3.3 <- lmer(loglook ~ cliff * phase + (1|subj),
                     data = exp3.control.test)

plot(influence(exp3.3, "subj"), which = "cook",
     cutoff = 4/42, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# 2 influential observations

exp3.3.cooks <- lmer(loglook ~ cliff * phase + (1|subj),
                     data = exp3.control.test %>% filter(subj != "28" & subj != "26"))

exp3.3.table <- gen.m(exp3.3.cooks)
exp3.3.ci <- gen.ci(exp3.3.cooks)[3:6,]

exp3.3.beta <- lmer(scale(loglook) ~ cliff * phase + (1|subj),
                     data = exp3.control.test %>% filter(subj != "28" & subj != "26"))
exp3.3.betas <- gen.beta(exp3.3.beta)

exp3.results3 <- cbind(exp3.3.table,exp3.3.betas,exp3.3.ci)

lsmeans(exp3.3.cooks, pairwise ~ cliff | phase)
## $lsmeans
## phase = control:
##  cliff   lsmean    SE  df lower.CL upper.CL
##  deep      2.37 0.089 136     2.20     2.55
##  shallow   2.55 0.089 136     2.37     2.72
## 
## phase = testavg:
##  cliff   lsmean    SE  df lower.CL upper.CL
##  deep      2.98 0.081 132     2.82     3.14
##  shallow   2.73 0.081 132     2.57     2.89
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## phase = control:
##  contrast       estimate    SE  df t.ratio p.value
##  deep - shallow   -0.172 0.116 104 -1.490  0.1400 
## 
## phase = testavg:
##  contrast       estimate    SE  df t.ratio p.value
##  deep - shallow    0.247 0.105 104  2.340  0.0210 
## 
## Degrees-of-freedom method: kenward-roger
# effect size
exp3.interaction.cohensd <- lme.dscore(exp3.3.cooks,
           data = exp3.control.test %>% filter(subj != "28" & subj != "26"),
           type = "lme4") %>% slice(3) %>% select(d) %>% as.numeric() * -1
# pairwise0 <- lsmeans(exp3.3.cooks, list(pairwise~cliff|phase)) 
# 
# pairwise.beta <- lsmeans(exp3.3.beta, list(pairwise~cliff|phase)) 
# 
# pairwise.beta.value <- pairwise.beta[[2]] %>% as.data.frame() %>% select(contrast, phase, estimate) %>%
#   rename(beta = estimate)
# 
# pairwise.CI <- confint(pairwise0[[2]]) %>% as.data.frame()
# 
# pairwise.t.p <- pairwise0[[2]] %>% as.data.frame()
# 
# within.exp3.almost <- full_join(pairwise.CI, pairwise.t.p) %>%
#   rename(est = estimate,
#          se = SE,
#          lower = lower.CL,
#          upper = upper.CL,
#          t = t.ratio,
#          p = p.value)
# 
# within.exp3 <- full_join(pairwise.beta.value, within.exp3.almost)
exp3.devices <- wide %>%
  filter(exp == "Exp.3") %>%
  tabyl(device)

exp3.highchair <- wide %>%
  filter(exp == "Exp.3") %>%
  tabyl(highchair)

exp3.qualratings <- wide %>%
  filter(exp == "Exp.3") %>%
  summarise(vquality = mean(video_quality, na.rm = TRUE),
            vquality.sd = sd(video_quality, na.rm = TRUE),
            aquality = mean(audio_quality, na.rm = TRUE),
            aquality.sd = sd(audio_quality, na.rm = TRUE))

Methods

Participants

Our final sample included 42 twelve- to fifteen-month-old infants (M = 13.95 months, range = 12.29-15.67, 24 female): a widened age range that enabled more rapid testing of participants, who were recruited both from our lab database, also through a cross-institution platform for recruitment for developmental cognitive science (https://childrenhelpingscience.com/). Our preregistered target sample size of 40 was determined based on a simulation power analysis over infants’ looking preferences towards the test events from Experiment 2; our stopping rule was to stop recruiting as soon as we reached our target N, but to finish collecting data if we over-recruited. Thus, our final sample was N = 42. A further 6 infants were excluded from the study (3 due to technical issues, 2 due to inattentiveness and 1 due to interference from the caregiver). Our pre-registration document is available at https://osf.io/96qsf/.

Procedure

Whereas Experiments 1 and 2 were conducted in a quiet, dark room in a lab setting, Experiment 3 was conducted over Zoom video conferencing, in infants’ homes, due to the COVID-19 pandemic, following procedures approved by the Committee on the Use of Human Subjects at Harvard University. We used materials developed by the Stanford Social Learning Lab (Social Learning Lab, 2020) to introduce caregivers to the online testing setup and to ask for verbal consent. Caregivers also provided written consent prior to the study session. Infants sat in a high chair (25 out of 42 participants) or their caregivers’ laps (17/42), depending on caregiver preferences, and watched the displays on a tablet (8/42) or a laptop computer (34/42). We asked caregivers, both before and during the study, to minimize distractions (pets, people walking by, and distracting objects) during the study session.

Before the experiment, infants saw a calibration video where their attention was drawn to the four corners of the screen, as well as the center of the screen. To maximize the quality of the events seen by infants, we shared our stimuli with caregivers through YouTube playlists, controlled the caregiver’s screen using Zoom’s remote control feature, and coded infants’ looking times during the study using jHab (Casstevens, 2007). Caregivers rated the quality of the audio and video on a 5-point Likert scale (1 = very poor; 5 = very good), giving high ratings, on average, for both (video: M = 4.88, SD = 0.33; audio: M = 4.85, SD = 0.36). After the session, we double checked for trial exclusions and generated the final data from the recording of the session video using Datavyu (Datavyu Team, 2014). As before, experimenters only had access to the video feed of infants’ faces (and not the displays) during the experiment, and therefore were unaware of the order of test events. To allow caregivers to attend to safety issues at home, we did not ask them to close their eyes, and instead instructed them to refrain from directing their infants’ attention toward or away from the screen. Our full online testing protocol is described in the SOM.

Data Coding and Analysis

The data coding and analysis strategy was identical to Experiment 2. Fifty-three out of NA total trials (including familiarization, test, and control trials) were excluded from analysis because of inattentiveness, distractions at home (e.g. pet noises, people walking by), technical issues and coding errors. The proportion of excluded trials (10.52%) was higher than what we observed in the lab in Experiment 2 (6.94%), due to distractions in the home environment, the smaller size of the screen displaying the videos at home, and the lower or more variable quality of the video feeds of the infants’ faces (which led to trial mis-timings). As in Experiments 1-2, 50% of the test trials were recoded by an additional naive coder (84 of 168 test trials). Interrater reliability was high, ICC = 0.96, 95% CI [0.93, 0.97].

Results

Pre-registered results

We fully replicated the two key results from Experiment 2. Infants looked longer at test when the agent chose to jump the deeper trench (Mdeep = 22.35s, SE = 1.26; Mshallow = 17.55s, SE = 1; [0.09,0.41], ß = 0.47, t(41) = 3.06, p = .002, one-tailed, d = -0.96, no influential participants). Infants’ looking preferences between the control events and the test events significantly differed from each other ([0.11,0.72], ß = 0.75, t(101.37) = 2.67, p = .009, two-tailed, d = 0.53, excluding 1 influential participant).

Exploratory results

During the control events, infants showed a numerical but non-significant preference for the event in which the inanimate object appeared over the shallower trench (Mdeep = 12.09s, SE = 1.27; Mshallow = 13.9s, SE = 1.48; [-0.42,0.07], ß = -0.17, t(64) = -1.39, p = .171, two-tailed, d = -0.35, excluding 2 influential participants). See Figure 4A.

Experiment 4, Studies 1-3 (10mo infants)

Study 1: Inferring value from risk (10-month-old infants)

exp1b.0 <- lmer(loglook ~ 1 + (1|subj),
               data = exp4.study1.avg)

exp1b.1 <- lmer(loglook ~ type + (1|subj),
               data = exp4.study1.avg)

# id influential observations
plot(influence(exp1b.1, "subj"), which = "cook",
     cutoff = 4/32, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# one influential observation, exclude

exp1b.1.cooks <- lmer(loglook ~ type + (1|subj),
               data = filter(exp4.study1.avg, subj != "S5_25"))

exp1b.1.table <- gen.m(exp1b.1.cooks)
exp1b.1.ci <- gen.ci(exp1b.1.cooks)[3:4,]

exp1b.1.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = filter(exp4.study1.avg, subj != "S5_25"))
exp1b.1.betas <- gen.beta(exp1b.1.beta)

exp1b.results <- cbind(exp1b.1.table, exp1b.1.betas,exp1b.1.ci)

exp1b.cohensd <- lme.dscore(exp1b.1.cooks,
           data = filter(exp4.study1.avg, subj != "S5_25"),
           type = "lme4") %>% select(d) %>% as.numeric() 

Comparing older and younger infants

exp1.1013 <- long.avg %>% 
  filter(exp == "Exp.1" | exp == "Exp4.Study1") %>% 
  mutate(agegroup = as.factor(case_when(agem < 12 ~ "younger",
                            agem > 12 ~ "older")))

exp1.1013$type <- relevel(exp1.1013$type, ref = "higher")

exp1.1013.1 <- lmer(loglook ~ type * agegroup + (1|subj),
               data = exp1.1013)

# id influential observations
plot(influence(exp1.1013.1, "subj"), which = "cook",
     cutoff = 4/64, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# one influential observation, exclude

exp1.1013.1.cooks <- lmer(loglook ~ type * agegroup + (1|subj),
               data = exp1.1013 %>% filter(subj != "S5_25"))

exp1.1013.1.table <- gen.m(exp1.1013.1.cooks)
exp1.1013.1.ci <- gen.ci(exp1.1013.1.cooks)[3:4,]

exp1.1013.1.beta <- lmer(scale(loglook) ~ type * agegroup  + (1|subj),
               data = exp1.1013 %>% filter(subj != "S5_25"))
exp1.1013.1.betas <- gen.beta(exp1.1013.1.beta)

exp1.1013.results <- cbind(exp1.1013.1.table, exp1.1013.1.betas,exp1.1013.1.ci)

exp1.1013.cohensd <- lme.dscore(exp1.1013.1.cooks,
           data = filter(exp1.1013, subj != "S5_25"),
           type = "lme4") %>% select(d) %>% slice(3) %>% as.numeric() 

Study 2: Minimizing danger

exp2b.test <- exp4.study2.avg %>% filter(phase == "testavg") 

exp2b.info <- info(exp2b.test)


exp2b.0 <- lmer(loglook ~ 1 + (1|subj),
               data = exp2b.test)

exp2b.1 <- lmer(loglook ~ type + (1|subj),
               data = exp2b.test)
summary(exp2b.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ type + (1 | subj)
##    Data: exp2b.test
## 
## REML criterion at convergence: 116
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9659 -0.6026  0.0678  0.5726  1.7083 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.244    0.494   
##  Residual             0.210    0.458   
## Number of obs: 60, groups:  subj, 30
## 
## Fixed effects:
##             Estimate Std. Error     df t value Pr(>|t|)    
## (Intercept)    2.997      0.123 45.005    24.4   <2e-16 ***
## typelower     -0.202      0.118 29.000    -1.7    0.099 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## typelower -0.481
# id influential observations
plot(influence(exp2b.1, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# 1 influential observation

exp2b.1.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp2b.test %>% filter(subj != "03-MR"))
exp2b.1.table <- gen.m(exp2b.1.cooks)
exp2b.1.ci <- gen.ci(exp2b.1.cooks)[3:4,]

exp2b.1.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp2b.test %>% filter(subj != "03-MR"))
exp2b.1.betas <- gen.beta(exp2b.1.beta)

exp2b.1.results <- cbind(exp2b.1.table, exp2b.1.betas, exp2b.1.ci)

exp2b.test.cohensd <- lme.dscore(exp2b.1.cooks,
           data = exp2b.test %>% filter(subj != "03-MR"),
           type = "lme4") %>% select(d) %>% as.numeric() 
exp2b.control <- exp4.study2.avg %>% filter(phase == "control")

exp2b.2 <- lmer(loglook ~ type + (1|subj),
               data = exp2b.control)

# id influential observations
plot(influence(exp2b.2, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# one influential observation

exp2b.2.cooks <- lmer(loglook ~ type + (1|subj),
               data = exp2b.control %>% filter(subj != "21-MR"))

exp2b.2.table <- gen.m(exp2b.2.cooks)
exp2b.2.ci <- gen.ci(exp2b.2.cooks)[3:4,]

exp2b.2.beta <- lmer(scale(loglook) ~ type + (1|subj),
               data = exp2b.control %>% filter(subj != "21-MR"))

exp2b.2.betas <- gen.beta(exp2b.2.beta)

exp2b.results2 <- cbind(exp2b.2.table, exp2b.2.betas, exp2b.2.ci)

exp2b.control.cohensd <- lme.dscore(exp2b.2.cooks,
           data = exp2b.control %>% filter(subj != "21-MR"),
           type = "lme4") %>% select(d) %>% as.numeric() 
exp2b.control.test <- exp4.study2.avg %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

  
exp2b.control.test$type <- as.factor(exp2b.control.test$type)

exp2b.3 <- lmer(loglook ~ cliff * phase + (1|subj),
                     data = exp2b.control.test)

plot(influence(exp2b.3, "subj"), which = "cook",
     cutoff = 4/30, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

exp2b.3.table <- gen.m(exp2b.3)
exp2b.3.ci <- gen.ci(exp2b.3)[3:6,]

exp2b.3.beta <- lmer(scale(loglook) ~ cliff * phase + (1|subj),
                     data = exp2b.control.test)
exp2b.3.betas <- gen.beta(exp2b.3.beta)

exp2b.results3 <- cbind(exp2b.3.table,exp2b.3.betas,exp2b.3.ci)

exp2b.interaction.cohensd <- lme.dscore(exp2b.3,
           data = exp2b.control.test,
           type = "lme4") %>% select(d) %>% slice(3) %>% as.numeric() 

Study 3: Minimizing danger (online, no shattering)

exp3b.devices <- wide %>%
  filter(exp == "Exp4.Study3") %>%
  tabyl(device)

exp3b.highchair <- wide %>%
  filter(exp == "Exp4.Study3") %>%
  tabyl(highchair)

exp3b.qualratings <- wide %>%
  filter(exp == "Exp4.Study3") %>%
  summarise(vquality = mean(video_quality, na.rm = TRUE),
            vquality.sd = sd(video_quality, na.rm = TRUE),
            aquality = mean(audio_quality, na.rm = TRUE),
            aquality.sd = sd(audio_quality, na.rm = TRUE))
exp3b.test <- exp4.study3.avg %>% filter(phase == "testavg")
exp4.study3.avg$type <- relevel(exp4.study3.avg$type, ref = "lower")


exp3b.1 <- lmer(data = exp3b.test, 
     formula = loglook ~ type + (1|subj))
summary(exp3b.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ type + (1 | subj)
##    Data: exp3b.test
## 
## REML criterion at convergence: 136
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8600 -0.6556 -0.0803  0.7117  2.2508 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0594   0.244   
##  Residual             0.2513   0.501   
## Number of obs: 80, groups:  subj, 40
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   2.6459     0.0881 75.2501   30.02   <2e-16 ***
## typelower     0.0845     0.1121 39.0000    0.75     0.46    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## typelower -0.636
# 1 influential subject found
plot(influence(exp3b.1, "subj"), which = "cook",
     cutoff = 4/40, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

exp3b.1.cooks <- lmer(data = exp3b.test %>% filter(subj != "10m_33"), 
     formula = loglook ~ type + (1|subj))
summary(exp3b.1.cooks)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ type + (1 | subj)
##    Data: exp3b.test %>% filter(subj != "10m_33")
## 
## REML criterion at convergence: 126
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0703 -0.6539 -0.0543  0.7160  1.7391 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0868   0.295   
##  Residual             0.2041   0.452   
## Number of obs: 78, groups:  subj, 39
## 
## Fixed effects:
##             Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)   2.6154     0.0864 69.7853   30.28   <2e-16 ***
## typelower     0.1357     0.1023 38.0000    1.33     0.19    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## typelower -0.592
exp3b.1.table <- gen.m(exp3b.1.cooks)
exp3b.1.ci <- gen.ci(exp3b.1.cooks)[3:4,]

exp3b.1.beta <- lmer(scale(loglook) ~type + (1|subj),
               data = exp3b.test %>% filter(subj != "10m_33"))
exp3b.1.betas <- gen.beta(exp3b.1.beta)

exp3b.1.results <- cbind(exp3b.1.table, exp3b.1.betas, exp3b.1.ci)

exp3b.test.cohensd <- lme.dscore(exp3b.1.cooks,
           data = exp3b.test %>% filter(subj != "10m_33"),
           type = "lme4") %>% select(d)  %>% as.numeric() 
exp3b.control.test <- exp4.study3.avg %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))
exp3b.control.test$cliff <- as.factor(exp3b.control.test$cliff)

exp3b.2 <- lmer(data = exp3b.control.test, 
     formula = loglook ~ cliff * phase + (1|subj))
summary(exp3b.2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase + (1 | subj)
##    Data: exp3b.control.test
## 
## REML criterion at convergence: 263
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8666 -0.7086 -0.0829  0.6966  2.3647 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.049    0.221   
##  Residual             0.267    0.517   
## Number of obs: 154, groups:  subj, 40
## 
## Fixed effects:
##                           Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)                 2.3423     0.0922 142.0665   25.39   <2e-16 ***
## cliffshallow                0.0377     0.1203 112.0503    0.31    0.754    
## phasetestavg                0.3036     0.1181 112.8277    2.57    0.011 *  
## cliffshallow:phasetestavg   0.0468     0.1668 111.5145    0.28    0.780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst
## cliffshallw -0.652              
## phasetestvg -0.668  0.509       
## clffshllw:p  0.470 -0.721 -0.706
# no influential subjects detected
plot(influence(exp3b.2, "subj"), which = "cook",
     cutoff = 4/40, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

exp3b.2.table <- gen.m(exp3b.2)
exp3b.2.ci <- gen.ci(exp3b.2)[3:6,]

exp3b.2.beta <- lmer(scale(loglook) ~cliff * phase + (1|subj),
               data = exp3b.control.test)
exp3b.2.betas <- gen.beta(exp3b.2.beta)

exp3b.2.results <- cbind(exp3b.2.table, exp3b.2.betas, exp3b.2.ci)

exp3b.interaction.cohensd <- lme.dscore(exp3b.2,
           data = exp3b.control.test,
           type = "lme4") %>% select(d) %>% slice(3) %>% as.numeric() 
exp3b.control <- exp4.study3.avg %>%
  filter(phase == "control") %>%
  mutate(cliff = type)

exp3b.3 <- lmer(data = exp3b.control, 
     formula = loglook ~ cliff + (1|subj))
summary(exp3b.3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff + (1 | subj)
##    Data: exp3b.control
## 
## REML criterion at convergence: 129
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8134 -0.6761 -0.0894  0.7408  2.0842 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0495   0.223   
##  Residual             0.2718   0.521   
## Number of obs: 74, groups:  subj, 38
## 
## Fixed effects:
##              Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)    2.3493     0.0932 70.5281   25.22   <2e-16 ***
## cliffshallow   0.0282     0.1215 36.1603    0.23     0.82    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cliffshallw -0.652
plot(influence(exp3b.3, "subj"), which = "cook",
     cutoff = 4/40, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

exp3b.3.table <- gen.m(exp3b.3)
exp3b.3.ci <- gen.ci(exp3b.3)[3:4,]

exp3b.3.beta <- lmer(data = exp3b.control,
     formula = scale(loglook) ~ cliff + (1|subj))
exp3b.3.betas <- gen.beta(exp3b.3.beta)

exp3b.3.results <- cbind(exp3b.3.table, exp3b.3.betas, exp3b.3.ci)

exp3b.control.cohensd <- lme.dscore(exp3b.3,
           data = exp3b.control.test,
           type = "lme4") %>% 
  select(d) %>% as.numeric() 

Comparing Studies 2-3 and Experiments 2-3

exp23.1013.avg <- rbind(exp3.avg, exp2.avg, exp4.study2.avg, exp4.study3.avg) %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

pooling.exp23 <- lmer(loglook ~ cliff * phase * agegroup + (1|subj) + (1|exp), data = exp23.1013.avg)

summary(pooling.exp23)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * agegroup + (1 | subj) + (1 | exp)
##    Data: exp23.1013.avg
## 
## REML criterion at convergence: 973
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.697 -0.642 -0.032  0.623  2.376 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.06820  0.2612  
##  exp      (Intercept) 0.00429  0.0655  
##  Residual             0.28228  0.5313  
## Number of obs: 546, groups:  subj, 142; exp, 4
## 
## Fixed effects:
##                                            Estimate Std. Error        df
## (Intercept)                                 2.36691    0.08763   6.64020
## cliffshallow                                0.16374    0.09404 399.12203
## phasetestavg                                0.67389    0.09215 408.33650
## agegroupyounger                            -0.00772    0.12270   6.38565
## cliffshallow:phasetestavg                  -0.42765    0.12917 398.14107
## cliffshallow:agegroupyounger               -0.08539    0.13148 399.09339
## phasetestavg:agegroupyounger               -0.23143    0.12948 404.54420
## cliffshallow:phasetestavg:agegroupyounger   0.31119    0.18219 398.10690
##                                           t value Pr(>|t|)    
## (Intercept)                                 27.01  4.9e-08 ***
## cliffshallow                                 1.74    0.082 .  
## phasetestavg                                 7.31  1.4e-12 ***
## agegroupyounger                             -0.06    0.952    
## cliffshallow:phasetestavg                   -3.31    0.001 ** 
## cliffshallow:agegroupyounger                -0.65    0.516    
## phasetestavg:agegroupyounger                -1.79    0.075 .  
## cliffshallow:phasetestavg:agegroupyounger    1.71    0.088 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst aggrpy clffshllw:p clffshllw:g phsts:
## cliffshallw -0.546                                                    
## phasetestvg -0.565  0.519                                             
## agegropyngr -0.714  0.390  0.404                                      
## clffshllw:p  0.398 -0.728 -0.707 -0.284                               
## clffshllw:g  0.391 -0.715 -0.371 -0.541  0.521                        
## phststvg:gg  0.402 -0.370 -0.712 -0.554  0.503       0.512            
## clffshllw:: -0.282  0.516  0.502  0.390 -0.709      -0.722      -0.707
plot(allEffects(pooling.exp23))

lsmeans(pooling.exp23, pairwise ~ cliff * phase * agegroup)
## $lsmeans
##  cliff   phase   agegroup lsmean    SE  df lower.CL upper.CL
##  deep    control older      2.37 0.088 6.6     2.16      2.6
##  shallow control older      2.53 0.087 6.4     2.32      2.7
##  deep    testavg older      3.04 0.084 5.6     2.83      3.3
##  shallow testavg older      2.78 0.084 5.6     2.57      3.0
##  deep    control younger    2.36 0.086 6.1     2.15      2.6
##  shallow control younger    2.44 0.086 6.1     2.23      2.6
##  deep    testavg younger    2.80 0.085 5.8     2.59      3.0
##  shallow testavg younger    2.76 0.085 5.8     2.55      3.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                                          estimate    SE  df t.ratio
##  deep control older - shallow control older           -0.16 0.094 401 -1.700 
##  deep control older - deep testavg older              -0.67 0.092 410 -7.300 
##  deep control older - shallow testavg older           -0.41 0.092 410 -4.400 
##  deep control older - deep control younger             0.01 0.123   6  0.100 
##  deep control older - shallow control younger         -0.07 0.123   6 -0.600 
##  deep control older - deep testavg younger            -0.43 0.122   6 -3.600 
##  deep control older - shallow testavg younger         -0.40 0.122   6 -3.200 
##  shallow control older - deep testavg older           -0.51 0.091 408 -5.600 
##  shallow control older - shallow testavg older        -0.25 0.091 408 -2.700 
##  shallow control older - deep control younger          0.17 0.122   6  1.400 
##  shallow control older - shallow control younger       0.09 0.122   6  0.800 
##  shallow control older - deep testavg younger         -0.27 0.121   6 -2.200 
##  shallow control older - shallow testavg younger      -0.23 0.121   6 -1.900 
##  deep testavg older - shallow testavg older            0.26 0.089 399  3.000 
##  deep testavg older - deep control younger             0.68 0.120   6  5.700 
##  deep testavg older - shallow control younger          0.60 0.120   6  5.000 
##  deep testavg older - deep testavg younger             0.24 0.120   6  2.000 
##  deep testavg older - shallow testavg younger          0.28 0.120   6  2.300 
##  shallow testavg older - deep control younger          0.42 0.120   6  3.500 
##  shallow testavg older - shallow control younger       0.34 0.120   6  2.800 
##  shallow testavg older - deep testavg younger         -0.02 0.120   6 -0.200 
##  shallow testavg older - shallow testavg younger       0.01 0.120   6  0.100 
##  deep control younger - shallow control younger       -0.08 0.092 401 -0.900 
##  deep control younger - deep testavg younger          -0.44 0.091 403 -4.900 
##  deep control younger - shallow testavg younger       -0.40 0.091 403 -4.400 
##  shallow control younger - deep testavg younger       -0.36 0.091 403 -4.000 
##  shallow control younger - shallow testavg younger    -0.33 0.091 403 -3.600 
##  deep testavg younger - shallow testavg younger        0.04 0.090 399  0.400 
##  p.value
##  0.6600 
##  <.0001 
##  <.0001 
##  1.0000 
##  1.0000 
##  0.1100 
##  0.1500 
##  <.0001 
##  0.1300 
##  0.8300 
##  0.9900 
##  0.4400 
##  0.5800 
##  0.0600 
##  0.0100 
##  0.0300 
##  0.5400 
##  0.4100 
##  0.1200 
##  0.2400 
##  1.0000 
##  1.0000 
##  0.9900 
##  <.0001 
##  <.0001 
##  <.0001 
##  0.0100 
##  1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 8 estimates
plot(influence(pooling.exp23, "subj"), which = "cook",
     cutoff = 4/142, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

pooling.exp23.cooks <- lmer(loglook ~ cliff * phase * agegroup + (1|subj) + (1|exp), data = exp23.1013.avg %>% filter(subj != "28"))
summary(pooling.exp23.cooks)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * agegroup + (1 | subj) + (1 | exp)
##    Data: exp23.1013.avg %>% filter(subj != "28")
## 
## REML criterion at convergence: 952
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9448 -0.6493 -0.0278  0.6330  2.4232 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.07202  0.2684  
##  exp      (Intercept) 0.00405  0.0636  
##  Residual             0.27168  0.5212  
## Number of obs: 542, groups:  subj, 141; exp, 4
## 
## Fixed effects:
##                                            Estimate Std. Error        df
## (Intercept)                                 2.35593    0.08675   6.69366
## cliffshallow                                0.20654    0.09299 396.05008
## phasetestavg                                0.67616    0.09112 405.16215
## agegroupyounger                             0.00271    0.12115   6.36409
## cliffshallow:phasetestavg                  -0.46520    0.12767 395.09468
## cliffshallow:agegroupyounger               -0.12780    0.12952 396.00722
## phasetestavg:agegroupyounger               -0.23331    0.12755 401.42519
## cliffshallow:phasetestavg:agegroupyounger   0.34835    0.17942 395.05325
##                                           t value Pr(>|t|)    
## (Intercept)                                 27.16  4.2e-08 ***
## cliffshallow                                 2.22   0.0269 *  
## phasetestavg                                 7.42  6.9e-13 ***
## agegroupyounger                              0.02   0.9829    
## cliffshallow:phasetestavg                   -3.64   0.0003 ***
## cliffshallow:agegroupyounger                -0.99   0.3244    
## phasetestavg:agegroupyounger                -1.83   0.0681 .  
## cliffshallow:phasetestavg:agegroupyounger    1.94   0.0529 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst aggrpy clffshllw:p clffshllw:g phsts:
## cliffshallw -0.546                                                    
## phasetestvg -0.565  0.519                                             
## agegropyngr -0.716  0.391  0.405                                      
## clffshllw:p  0.398 -0.728 -0.707 -0.285                               
## clffshllw:g  0.392 -0.718 -0.373 -0.540  0.523                        
## phststvg:gg  0.404 -0.371 -0.714 -0.553  0.505       0.512            
## clffshllw:: -0.283  0.518  0.503  0.390 -0.712      -0.722      -0.707
pooling.exp23.table <- gen.m(pooling.exp23.cooks)
pooling.exp23.ci <- gen.ci(pooling.exp23.cooks)[3:10,]

pooling.exp23.betas <- gen.beta(pooling.exp23.cooks)

pooling.exp23.results <- cbind(pooling.exp23.table, pooling.exp23.betas, pooling.exp23.ci)

pooling.exp23.cohensd <- lme.dscore(pooling.exp23.cooks,
           data = exp23.1013.avg %>% filter(subj != "28"),
           type = "lme4") %>% slice(7) %>%
  select(d) %>% as.numeric()
exp23.1013.test <- exp23.1013.avg %>% filter(phase == "testavg")
exp23.1013.control <- exp23.1013.avg %>% filter(phase == "control")

pooling.exp23.test <- lmer(loglook ~ cliff * agegroup + (1|subj) + (1|exp), data = exp23.1013.test)

plot(influence(pooling.exp23.test, "subj"), which = "cook",
     cutoff = 4/142, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

pooling.exp23.test.cooks <- lmer(loglook ~ cliff * agegroup + (1|subj) + (1|exp), data = exp23.1013.test %>% filter(subj != "10m_33"))

pooling.exp23.test.table <- gen.m(pooling.exp23.test.cooks)
pooling.exp23.test.ci <- gen.ci(pooling.exp23.test.cooks)[4:7,]
pooling.exp23.test.betas <- gen.beta(pooling.exp23.test.cooks)

pooling.exp23.test.results <- cbind(pooling.exp23.test.table, pooling.exp23.test.betas, pooling.exp23.test.ci)

pooling.exp23.test.cohensd <- lme.dscore(pooling.exp23.test.cooks,
           data = exp23.1013.test %>% filter(subj != "10_33"),
           type = "lme4") %>% slice(3) %>%
  select(d) %>% as.numeric()

pooling.exp23.control <- lmer(loglook ~ cliff * agegroup + (1|subj) + (1|exp), data = exp23.1013.control)

plot(influence(pooling.exp23.control, "subj"), which = "cook",
     cutoff = 4/142, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

pooling.exp23.control.cooks <- lmer(loglook ~ cliff * agegroup + (1|subj) + (1|exp), data = exp23.1013.control %>% filter(subj != "28" & subj != "26"))

pooling.exp23.control.table <- gen.m(pooling.exp23.control.cooks)
pooling.exp23.control.ci <- gen.ci(pooling.exp23.control.cooks)[4:7,]
pooling.exp23.control.betas <- gen.beta(pooling.exp23.control.cooks)

pooling.exp23.control.results <- cbind(pooling.exp23.control.table, pooling.exp23.control.betas, pooling.exp23.control.ci)

pooling.exp23.control.cohensd <- lme.dscore(pooling.exp23.control.cooks,
           data = exp23.1013.control %>% filter(subj != "28" & subj != "26"),
           type = "lme4") %>% slice(3) %>%
  select(d) %>% as.numeric()

In Experiment 4, we investigated the developmental origins of the capacity to reason about danger by testing infants under one year of age, using the respective methods of Experiments 1-3. We will reference these samples as Experiment 4, Studies 1, 2, and 3. All 3 studies focused on 10-month-olds because of their previous success in reasoning about the physical costs of actions (e.g. in [redacted]).

Comparing Study 2 to Experiment 2

exp2.1013.avg <- rbind(exp2.avg, exp4.study2.avg) %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

length(unique(exp2.1013.avg$subj))
## [1] 60
# dropped random intercept for exp to deal with issues of convergence
pooling.exp2.2b <- lmer(loglook ~ cliff * phase * agegroup + (1|subj), data = exp2.1013.avg)

summary(pooling.exp2.2b)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * agegroup + (1 | subj)
##    Data: exp2.1013.avg
## 
## REML criterion at convergence: 450
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5427 -0.6684  0.0196  0.6716  2.3739 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0931   0.305   
##  Residual             0.3016   0.549   
## Number of obs: 238, groups:  subj, 60
## 
## Fixed effects:
##                                           Estimate Std. Error       df t value
## (Intercept)                                 2.4022     0.1183 202.4665   20.31
## cliffshallow                                0.2125     0.1447 173.1253    1.47
## phasetestavg                                0.7102     0.1447 173.1253    4.91
## agegroupyounger                            -0.0266     0.1648 199.8027   -0.16
## cliffshallow:phasetestavg                  -0.4985     0.2026 172.2444   -2.46
## cliffshallow:agegroupyounger               -0.0867     0.2026 172.2444   -0.43
## phasetestavg:agegroupyounger               -0.0887     0.2026 172.2444   -0.44
## cliffshallow:phasetestavg:agegroupyounger   0.1711     0.2851 171.7904    0.60
##                                           Pr(>|t|)    
## (Intercept)                                < 2e-16 ***
## cliffshallow                                 0.144    
## phasetestavg                               2.1e-06 ***
## agegroupyounger                              0.872    
## cliffshallow:phasetestavg                    0.015 *  
## cliffshallow:agegroupyounger                 0.669    
## phasetestavg:agegroupyounger                 0.662    
## cliffshallow:phasetestavg:agegroupyounger    0.549    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst aggrpy clffshllw:p clffshllw:g phsts:
## cliffshallw -0.636                                                    
## phasetestvg -0.636  0.520                                             
## agegropyngr -0.718  0.457  0.457                                      
## clffshllw:p  0.454 -0.714 -0.714 -0.326                               
## clffshllw:g  0.454 -0.714 -0.371 -0.627  0.510                        
## phststvg:gg  0.454 -0.371 -0.714 -0.627  0.510       0.510            
## clffshllw:: -0.323  0.508  0.508  0.446 -0.711      -0.711      -0.711
plot(allEffects(pooling.exp2.2b))

lsmeans(pooling.exp2.2b, pairwise ~ cliff * phase * agegroup)
## $lsmeans
##  cliff   phase   agegroup lsmean    SE  df lower.CL upper.CL
##  deep    control older      2.40 0.118 203     2.17      2.6
##  shallow control older      2.61 0.115 197     2.39      2.8
##  deep    testavg older      3.11 0.115 197     2.89      3.3
##  shallow testavg older      2.83 0.115 197     2.60      3.1
##  deep    control younger    2.38 0.115 197     2.15      2.6
##  shallow control younger    2.50 0.115 197     2.28      2.7
##  deep    testavg younger    3.00 0.115 197     2.77      3.2
##  shallow testavg younger    2.80 0.115 197     2.57      3.0
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                                          estimate    SE  df t.ratio
##  deep control older - shallow control older           -0.21 0.145 174 -1.500 
##  deep control older - deep testavg older              -0.71 0.145 174 -4.900 
##  deep control older - shallow testavg older           -0.42 0.145 174 -2.900 
##  deep control older - deep control younger             0.03 0.165 200  0.200 
##  deep control older - shallow control younger         -0.10 0.165 200 -0.600 
##  deep control older - deep testavg younger            -0.59 0.165 200 -3.600 
##  deep control older - shallow testavg younger         -0.39 0.165 200 -2.400 
##  shallow control older - deep testavg older           -0.50 0.142 172 -3.500 
##  shallow control older - shallow testavg older        -0.21 0.142 172 -1.500 
##  shallow control older - deep control younger          0.24 0.162 197  1.500 
##  shallow control older - shallow control younger       0.11 0.162 197  0.700 
##  shallow control older - deep testavg younger         -0.38 0.162 197 -2.400 
##  shallow control older - shallow testavg younger      -0.18 0.162 197 -1.100 
##  deep testavg older - shallow testavg older            0.29 0.142 172  2.000 
##  deep testavg older - deep control younger             0.74 0.162 197  4.500 
##  deep testavg older - shallow control younger          0.61 0.162 197  3.800 
##  deep testavg older - deep testavg younger             0.12 0.162 197  0.700 
##  deep testavg older - shallow testavg younger          0.32 0.162 197  2.000 
##  shallow testavg older - deep control younger          0.45 0.162 197  2.800 
##  shallow testavg older - shallow control younger       0.32 0.162 197  2.000 
##  shallow testavg older - deep testavg younger         -0.17 0.162 197 -1.100 
##  shallow testavg older - shallow testavg younger       0.03 0.162 197  0.200 
##  deep control younger - shallow control younger       -0.13 0.142 172 -0.900 
##  deep control younger - deep testavg younger          -0.62 0.142 172 -4.400 
##  deep control younger - shallow testavg younger       -0.42 0.142 172 -3.000 
##  shallow control younger - deep testavg younger       -0.50 0.142 172 -3.500 
##  shallow control younger - shallow testavg younger    -0.29 0.142 172 -2.100 
##  deep testavg younger - shallow testavg younger        0.20 0.142 172  1.400 
##  p.value
##  0.8200 
##  <.0001 
##  0.0700 
##  1.0000 
##  1.0000 
##  0.0100 
##  0.2500 
##  0.0100 
##  0.8100 
##  0.8200 
##  1.0000 
##  0.2700 
##  0.9500 
##  0.4700 
##  <.0001 
##  0.0100 
##  1.0000 
##  0.5200 
##  0.1100 
##  0.4800 
##  0.9700 
##  1.0000 
##  0.9900 
##  <.0001 
##  0.0700 
##  0.0100 
##  0.4400 
##  0.8500 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 8 estimates
plot(influence(pooling.exp2.2b, "subj"), which = "cook",
     cutoff = 4/60, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# no influential observations

pooling.exp2.2b.table <- gen.m(pooling.exp2.2b)
pooling.exp2.2b.ci <- gen.ci(pooling.exp2.2b)[3:10,]

pooling.exp2.2b.betas <- gen.beta(pooling.exp2.2b)

pooling.exp2.2b.results <- cbind(pooling.exp2.2b.table, pooling.exp2.2b.betas, pooling.exp2.2b.ci)

pooling.exp2.2b.cohensd <- lme.dscore(pooling.exp2.2b,
           data = exp23.1013.avg,
           type = "lme4") %>% slice(7) %>%
  select(d) %>% as.numeric()

Comparing Study 3 to Experiment 3

exp3.3b.1013.avg <- rbind(exp3.avg, exp4.study3.avg) %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

length(unique(exp3.3b.1013.avg$subj))
## [1] 82
# dropped random intercept for exp to deal with issues of convergence
pooling.exp3.3b <- lmer(loglook ~ cliff * phase * agegroup + (1|subj), data = exp3.3b.1013.avg)

summary(pooling.exp3.3b)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * agegroup + (1 | subj)
##    Data: exp3.3b.1013.avg
## 
## REML criterion at convergence: 529
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.724 -0.631 -0.031  0.633  2.473 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0478   0.219   
##  Residual             0.2705   0.520   
## Number of obs: 308, groups:  subj, 82
## 
## Fixed effects:
##                                            Estimate Std. Error        df
## (Intercept)                                 2.33387    0.09506 288.74413
## cliffshallow                                0.12106    0.12434 219.43615
## phasetestavg                                0.64760    0.11975 229.14048
## agegroupyounger                             0.00869    0.13272 287.04959
## cliffshallow:phasetestavg                  -0.36915    0.16835 219.43615
## cliffshallow:agegroupyounger               -0.08359    0.17358 220.55054
## phasetestavg:agegroupyounger               -0.34428    0.16877 226.24243
## cliffshallow:phasetestavg:agegroupyounger   0.41621    0.23778 220.02991
##                                           t value Pr(>|t|)    
## (Intercept)                                 24.55  < 2e-16 ***
## cliffshallow                                 0.97    0.331    
## phasetestavg                                 5.41  1.6e-07 ***
## agegroupyounger                              0.07    0.948    
## cliffshallow:phasetestavg                   -2.19    0.029 *  
## cliffshallow:agegroupyounger                -0.48    0.631    
## phasetestavg:agegroupyounger                -2.04    0.043 *  
## cliffshallow:phasetestavg:agegroupyounger    1.75    0.081 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst aggrpy clffshllw:p clffshllw:g phsts:
## cliffshallw -0.654                                                    
## phasetestvg -0.694  0.519                                             
## agegropyngr -0.716  0.468  0.497                                      
## clffshllw:p  0.483 -0.739 -0.703 -0.346                               
## clffshllw:g  0.468 -0.716 -0.372 -0.654  0.529                        
## phststvg:gg  0.492 -0.368 -0.710 -0.682  0.499       0.514            
## clffshllw:: -0.342  0.523  0.498  0.477 -0.708      -0.730      -0.704
plot(allEffects(pooling.exp3.3b))

lsmeans(pooling.exp3.3b, pairwise ~ cliff * phase * agegroup)
## $lsmeans
##  cliff   phase   agegroup lsmean    SE  df lower.CL upper.CL
##  deep    control older      2.33 0.095 289     2.15     2.52
##  shallow control older      2.45 0.095 289     2.27     2.64
##  deep    testavg older      2.98 0.087 282     2.81     3.15
##  shallow testavg older      2.73 0.087 282     2.56     2.90
##  deep    control younger    2.34 0.093 286     2.16     2.52
##  shallow control younger    2.38 0.093 286     2.20     2.56
##  deep    testavg younger    2.65 0.089 282     2.47     2.82
##  shallow testavg younger    2.73 0.089 282     2.55     2.91
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                                          estimate    SE  df t.ratio
##  deep control older - shallow control older           -0.12 0.124 221 -1.000 
##  deep control older - deep testavg older              -0.65 0.120 231 -5.400 
##  deep control older - shallow testavg older           -0.40 0.120 231 -3.300 
##  deep control older - deep control younger            -0.01 0.133 287 -0.100 
##  deep control older - shallow control younger         -0.05 0.133 287 -0.300 
##  deep control older - deep testavg younger            -0.31 0.130 286 -2.400 
##  deep control older - shallow testavg younger         -0.40 0.130 286 -3.000 
##  shallow control older - deep testavg older           -0.53 0.120 231 -4.400 
##  shallow control older - shallow testavg older        -0.28 0.120 231 -2.300 
##  shallow control older - deep control younger          0.11 0.133 287  0.800 
##  shallow control older - shallow control younger       0.07 0.133 287  0.600 
##  shallow control older - deep testavg younger         -0.19 0.130 286 -1.500 
##  shallow control older - shallow testavg younger      -0.28 0.130 286 -2.100 
##  deep testavg older - shallow testavg older            0.25 0.114 221  2.200 
##  deep testavg older - deep control younger             0.64 0.127 284  5.000 
##  deep testavg older - shallow control younger          0.60 0.127 284  4.700 
##  deep testavg older - deep testavg younger             0.34 0.125 282  2.700 
##  deep testavg older - shallow testavg younger          0.25 0.125 282  2.000 
##  shallow testavg older - deep control younger          0.39 0.127 284  3.100 
##  shallow testavg older - shallow control younger       0.35 0.127 284  2.800 
##  shallow testavg older - deep testavg younger          0.09 0.125 282  0.700 
##  shallow testavg older - shallow testavg younger       0.00 0.125 282  0.000 
##  deep control younger - shallow control younger       -0.04 0.121 223 -0.300 
##  deep control younger - deep testavg younger          -0.30 0.119 225 -2.500 
##  deep control younger - shallow testavg younger       -0.39 0.119 225 -3.300 
##  shallow control younger - deep testavg younger       -0.27 0.119 225 -2.200 
##  shallow control younger - shallow testavg younger    -0.35 0.119 225 -2.900 
##  deep testavg younger - shallow testavg younger       -0.08 0.116 221 -0.700 
##  p.value
##  0.9800 
##  <.0001 
##  0.0200 
##  1.0000 
##  1.0000 
##  0.2500 
##  0.0500 
##  <.0001 
##  0.2900 
##  0.9900 
##  1.0000 
##  0.8300 
##  0.4100 
##  0.3600 
##  <.0001 
##  <.0001 
##  0.1300 
##  0.4700 
##  0.0500 
##  0.1000 
##  1.0000 
##  1.0000 
##  1.0000 
##  0.1800 
##  0.0300 
##  0.3400 
##  0.0700 
##  1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 8 estimates
plot(influence(pooling.exp3.3b, "subj"), which = "cook",
     cutoff = 4/82, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# 2 influential observations

pooling.exp3.3b.cooks <- lmer(loglook ~ cliff * phase * agegroup + (1|subj), data = exp3.3b.1013.avg %>% filter(subj != "28", subj != "26"))
summary(pooling.exp3.3b.cooks)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * agegroup + (1 | subj)
##    Data: exp3.3b.1013.avg %>% filter(subj != "28", subj != "26")
## 
## REML criterion at convergence: 488
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9480 -0.6389 -0.0603  0.6127  2.4682 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0459   0.214   
##  Residual             0.2451   0.495   
## Number of obs: 300, groups:  subj, 80
## 
## Fixed effects:
##                                           Estimate Std. Error       df t value
## (Intercept)                                 2.3742     0.0936 280.2098   25.38
## cliffshallow                                0.1724     0.1219 212.7961    1.41
## phasetestavg                                0.6033     0.1172 222.7382    5.15
## agegroupyounger                            -0.0320     0.1288 278.2891   -0.25
## cliffshallow:phasetestavg                  -0.4189     0.1647 212.7961   -2.54
## cliffshallow:agegroupyounger               -0.1345     0.1678 213.8365   -0.80
## phasetestavg:agegroupyounger               -0.2997     0.1629 219.7481   -1.84
## cliffshallow:phasetestavg:agegroupyounger   0.4655     0.2295 213.3521    2.03
##                                           Pr(>|t|)    
## (Intercept)                                < 2e-16 ***
## cliffshallow                                 0.159    
## phasetestavg                               5.8e-07 ***
## agegroupyounger                              0.804    
## cliffshallow:phasetestavg                    0.012 *  
## cliffshallow:agegroupyounger                 0.424    
## phasetestavg:agegroupyounger                 0.067 .  
## cliffshallow:phasetestavg:agegroupyounger    0.044 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst aggrpy clffshllw:p clffshllw:g phsts:
## cliffshallw -0.651                                                    
## phasetestvg -0.694  0.520                                             
## agegropyngr -0.726  0.473  0.504                                      
## clffshllw:p  0.482 -0.740 -0.703 -0.350                               
## clffshllw:g  0.473 -0.726 -0.378 -0.651  0.538                        
## phststvg:gg  0.499 -0.374 -0.719 -0.681  0.505       0.515            
## clffshllw:: -0.346  0.531  0.504  0.476 -0.718      -0.731      -0.704
pooling.exp3.3b.table <- gen.m(pooling.exp3.3b.cooks)
pooling.exp3.3b.ci <- gen.ci(pooling.exp3.3b.cooks)[3:10,]

pooling.exp3.3b.betas <- gen.beta(pooling.exp3.3b.cooks)

pooling.exp3.3b.results <- cbind(pooling.exp3.3b.table, pooling.exp3.3b.betas, pooling.exp3.3b.ci)

pooling.exp3.3b.cohensd <- lme.dscore(pooling.exp3.3b.cooks,
           data = exp3.3b.1013.avg %>% filter(subj != "28", subj != "26"),
           type = "lme4") %>% slice(7) %>%
  select(d) %>% as.numeric()

Comparing Study 2 to Study 3

exp4.studies23.avg <- rbind(exp4.study2.avg, exp4.study3.avg) %>%
  mutate(cliff = case_when(type == "deep" | type == "higher" ~ "deep",
                           type == "shallow" | type == "lower" ~ "shallow"))

length(unique(exp4.studies23.avg$subj))
## [1] 70
# dropped random intercept for exp to deal with issues of convergence
pooling.exp3b.2b <- lmer(loglook ~ cliff * phase * exp + (1|subj), data = exp4.studies23.avg)

summary(pooling.exp3b.2b)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: loglook ~ cliff * phase * exp + (1 | subj)
##    Data: exp4.studies23.avg
## 
## REML criterion at convergence: 497
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5588 -0.6960 -0.0468  0.6774  2.4252 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0684   0.261   
##  Residual             0.2885   0.537   
## Number of obs: 274, groups:  subj, 70
## 
## Fixed effects:
##                                          Estimate Std. Error       df t value
## (Intercept)                                2.3756     0.1091 240.0582   21.78
## cliffshallow                               0.1258     0.1387 198.2460    0.91
## phasetestavg                               0.6214     0.1387 198.2460    4.48
## expExp4.Study3                            -0.0350     0.1466 242.2726   -0.24
## cliffshallow:phasetestavg                 -0.3274     0.1961 198.2460   -1.67
## cliffshallow:expExp4.Study3               -0.0862     0.1868 199.0576   -0.46
## phasetestavg:expExp4.Study3               -0.3161     0.1853 199.6380   -1.71
## cliffshallow:phasetestavg:expExp4.Study3   0.3724     0.2618 198.6591    1.42
##                                          Pr(>|t|)    
## (Intercept)                               < 2e-16 ***
## cliffshallow                                0.365    
## phasetestavg                              1.3e-05 ***
## expExp4.Study3                              0.811    
## cliffshallow:phasetestavg                   0.097 .  
## cliffshallow:expExp4.Study3                 0.645    
## phasetestavg:expExp4.Study3                 0.090 .  
## cliffshallow:phasetestavg:expExp4.Study3    0.157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) clffsh phstst eE4.S3 clffs: c:E4.S p:E4.S
## cliffshallw -0.636                                          
## phasetestvg -0.636  0.500                                   
## expExp4.St3 -0.744  0.473  0.473                            
## clffshllw:p  0.450 -0.707 -0.707 -0.334                     
## clffs:E4.S3  0.472 -0.743 -0.371 -0.637  0.525              
## phsts:E4.S3  0.476 -0.374 -0.748 -0.644  0.529  0.504       
## clff::E4.S3 -0.337  0.530  0.530  0.454 -0.749 -0.713 -0.706
plot(allEffects(pooling.exp3b.2b))

lsmeans(pooling.exp3b.2b, pairwise ~ cliff * phase * exp)
## $lsmeans
##  cliff   phase   exp         lsmean    SE  df lower.CL upper.CL
##  deep    control Exp4.Study2   2.38 0.109 240     2.16      2.6
##  shallow control Exp4.Study2   2.50 0.109 240     2.29      2.7
##  deep    testavg Exp4.Study2   3.00 0.109 240     2.78      3.2
##  shallow testavg Exp4.Study2   2.80 0.109 240     2.58      3.0
##  deep    control Exp4.Study3   2.34 0.098 245     2.15      2.5
##  shallow control Exp4.Study3   2.38 0.098 245     2.19      2.6
##  deep    testavg Exp4.Study3   2.65 0.094 240     2.46      2.8
##  shallow testavg Exp4.Study3   2.73 0.094 240     2.54      2.9
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                                                  estimate    SE  df
##  deep control Exp4.Study2 - shallow control Exp4.Study2       -0.13 0.139 198
##  deep control Exp4.Study2 - deep testavg Exp4.Study2          -0.62 0.139 198
##  deep control Exp4.Study2 - shallow testavg Exp4.Study2       -0.42 0.139 198
##  deep control Exp4.Study2 - deep control Exp4.Study3           0.04 0.147 242
##  deep control Exp4.Study2 - shallow control Exp4.Study3        0.00 0.147 242
##  deep control Exp4.Study2 - deep testavg Exp4.Study3          -0.27 0.144 240
##  deep control Exp4.Study2 - shallow testavg Exp4.Study3       -0.35 0.144 240
##  shallow control Exp4.Study2 - deep testavg Exp4.Study2       -0.50 0.139 198
##  shallow control Exp4.Study2 - shallow testavg Exp4.Study2    -0.29 0.139 198
##  shallow control Exp4.Study2 - deep control Exp4.Study3        0.16 0.147 242
##  shallow control Exp4.Study2 - shallow control Exp4.Study3     0.12 0.147 242
##  shallow control Exp4.Study2 - deep testavg Exp4.Study3       -0.14 0.144 240
##  shallow control Exp4.Study2 - shallow testavg Exp4.Study3    -0.23 0.144 240
##  deep testavg Exp4.Study2 - shallow testavg Exp4.Study2        0.20 0.139 198
##  deep testavg Exp4.Study2 - deep control Exp4.Study3           0.66 0.147 242
##  deep testavg Exp4.Study2 - shallow control Exp4.Study3        0.62 0.147 242
##  deep testavg Exp4.Study2 - deep testavg Exp4.Study3           0.35 0.144 240
##  deep testavg Exp4.Study2 - shallow testavg Exp4.Study3        0.27 0.144 240
##  shallow testavg Exp4.Study2 - deep control Exp4.Study3        0.45 0.147 242
##  shallow testavg Exp4.Study2 - shallow control Exp4.Study3     0.42 0.147 242
##  shallow testavg Exp4.Study2 - deep testavg Exp4.Study3        0.15 0.144 240
##  shallow testavg Exp4.Study2 - shallow testavg Exp4.Study3     0.06 0.144 240
##  deep control Exp4.Study3 - shallow control Exp4.Study3       -0.04 0.125 200
##  deep control Exp4.Study3 - deep testavg Exp4.Study3          -0.31 0.123 201
##  deep control Exp4.Study3 - shallow testavg Exp4.Study3       -0.39 0.123 201
##  shallow control Exp4.Study3 - deep testavg Exp4.Study3       -0.27 0.123 201
##  shallow control Exp4.Study3 - shallow testavg Exp4.Study3    -0.35 0.123 201
##  deep testavg Exp4.Study3 - shallow testavg Exp4.Study3       -0.08 0.120 198
##  t.ratio p.value
##  -0.900  0.9900 
##  -4.500  <.0001 
##  -3.000  0.0600 
##   0.200  1.0000 
##   0.000  1.0000 
##  -1.900  0.5700 
##  -2.500  0.2200 
##  -3.600  0.0100 
##  -2.100  0.4100 
##   1.100  0.9600 
##   0.800  0.9900 
##  -1.000  0.9700 
##  -1.600  0.7600 
##   1.500  0.8300 
##   4.500  <.0001 
##   4.200  <.0001 
##   2.400  0.2300 
##   1.800  0.5900 
##   3.100  0.0400 
##   2.800  0.0900 
##   1.000  0.9700 
##   0.500  1.0000 
##  -0.300  1.0000 
##  -2.500  0.2100 
##  -3.200  0.0400 
##  -2.200  0.3800 
##  -2.800  0.0900 
##  -0.700  1.0000 
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 8 estimates
plot(influence(pooling.exp3b.2b, "subj"), which = "cook",
     cutoff = 4/70, sort = TRUE,
     xlab = "Cook´s Distance",
     ylab = "Subject ID")

# no influential observations

pooling.exp3b.2b.table <- gen.m(pooling.exp3b.2b)
pooling.exp3b.2b.ci <- gen.ci(pooling.exp3b.2b)[3:10,]

pooling.exp3b.2b.betas <- gen.beta(pooling.exp3b.2b)

pooling.exp3b.2b.results <- cbind(pooling.exp3b.2b.table, pooling.exp3b.2b.betas, pooling.exp3b.2b.ci)

pooling.exp3b.2b.cohensd <- lme.dscore(pooling.exp3b.2b,
           data = exp4.studies23.avg,
           type = "lme4") %>% slice(7) %>%
  select(d) %>% as.numeric()
ageplot1 <- ggplot(data = exp23.1013.avg %>% filter(phase == "testavg"), aes(agem, loglook, colour = cliff)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~phase)

ageplot2 <- ggplot(data = exp23.1013.avg %>% filter(agegroup == "older", phase == "testavg"), aes(agem, loglook, colour = cliff)) +
  geom_point() +
  geom_smooth(method = "lm") +
  facet_wrap(~phase)

(ageplot1 / ageplot2) + plot_annotation(tag_levels = 'A')

Methods

Participants

Our final sample included a grand total of 102 10-month-old infants. Studies 1-2 were conducted in the lab, and our final sample included 32 infants in Study 1 (M = 10.13 months, range = 9.6-10.63, 15 female; an additional 6 infants tested and excluded from the final sample), and 30 infants in Study 2 (M = 9.95 months, range = 8.97-10.47, 17 female; an additional 2 infants tested and excluded). In Study 3, we collected an online sample of 40 (M = (M = 10.24 months, range = 9.53-11.06, 20 female, xxxx excluded). In the online sample, infants sat in a high chair (13 out of 40 participants) or their caregivers’ laps (27/40), depending on caregiver preferences, and watched the displays on a tablet (12/40) or a laptop computer (28/40). Caregivers gave high ratings for both the video quality (M = 4.88, SD = 0.33) and audio quality (M = 4.86, SD = 0.34). All three of these studies were pre-registered (Study 1: https://osf.io/uh8ns/; Study 2: https://osf.io/kx928/, Study 3: https://osf.io/48j9v/) Data reliability. As in Experiments 1-3, the reliability of the looking time data in Experiment 4 were high (Study 1: ICC = 0.995, 95% CI [0.991, 0.997]; Study 2: ICC = 0.999, 95% CI [0.998, 0.999]; Study 3:ICC = 0.909, 95% CI [0.859, 0.942]).

Results

Inferring value from danger (Study 1)

When we tested 10-month-old infants using identical protocols as reported in Experiment 1, these younger infants did not show a statistically significant looking preference between the test events, (Mhighervalue = NAs, Mlowervalue = NA, pooled SE = NA, [-0.121,0.301], ß = 0.168, B = 0.09, SE = 0.106, p = .202, one-tailed d = 0.31, removing 1 influential participant). Comparing the data from Experiment 1 and Experiment 4, Study 1, 10- and 13-month-old infants did not significantly differ in their looking preferences in this task, [0.015,0.409], ß = -0.213, B = -0.122, SE = 0.144, p = .398, two-tailed, d = -0.22, no influential participants.

Avoiding danger (Studies 2-3)

When we tested 10-month-old infants in identical protocols as Experiment 2, ten-month-old looked longer at test when the agent chose the deeper the shallower trench (Mdeeper = NAs, Mshallower = NA, pooled SE = NA, [-0.472,-0.047], ß = -0.386, B = -0.26, SE = 0.107, p = .011, one-tailed d = 0.92, removing 1 influential participant). During control events, 10-month-old infants did not show a significant looking preference (Mdeeper = NAs, Mshallower = NA, pooled SE = NA, [-0.174,0.301], ß = 0.109, B = 0.064, SE = 0.119, p = .598, two-tailed d = -0.2, excluding 1 influential participant). In contrast to the data from older infants, these two patterns of looking preference did not differ from each other, [-0.728,0.073], ß = -0.483, B = -0.327, SE = 0.205, p = .115, two-tailed, d = 0.05no influential participants.

Notably, we did not replicate the results of Study 2 when we ran an additional online sample of infants: In Study 3, 10-month-old infants did not show a looking preference during the test events (Mdeeper = NAs, Mshallower = NA, pooled SE = NA, [-0.067,0.339], ß = 0.251, B = 0.136, SE = 0.102, p = .0965, one-tailed d = 0.43, excluding 1 influential participant), or the control events (Mdeeper = NAs, Mshallower = NA, pooled SE = NA, [-0.213,0.269], ß = 0.05, B = 0.028, SE = 0.121, p = .818, two-tailed d = -0.13, no influential participants), and their looking preferences did not differ across the two phases of the experiment, [-0.28,0.37], ß = 0.08, B = 0.05, SE = 0.17, p = .78, two-tailed, d = 0.05, no influential observations.

Pooling data across older and younger infants tested in Experiments 2-3, and Experiment 4, Studies 2-3, we found a marginal 3-way interaction between cliff depth (shallow vs deep), phase of experiment (control vs test), and age group (infants younger than 1y vs older than 1y), [-0.481,0.016], ß = 0.348, B = 0.348, SE = 0.179, p = .053, two-tailed, d = 0.2, 1 influential participant.

This interaction appeared to be driven by differences in younger and older infants’ responses to the test events: we found a significant interaction between age group (younger vs older than 1y) and cliff depth (shallow vs deep) for the test events, [0.053,0.453], ß = 0.253, B = 0.253, SE = 0.103, p = .015, two-tailed, d = 0.36, excluding one influential participant, but not the control events, [-0.362,0.119], ß = -0.121, B = -0.121, SE = 0.123, p = .326, two-tailed, d = -0.18excluding two influential participants.

Thus, in a large and sufficiently powered sample (N = 142), infants younger and infants older than 1 year of age differed in their pattern of looking responses to events where agents choose more vs less dangerous actions, but did not differ when their attention was simply drawn to the physical trenches where these actions occurred.

Supplemental Materials

# is a lognormal transformation justified given the distribution of looks?
fig.S1 <- ggplot(data = risk.avg, aes(look, fill = exp))
fig.S1 +
  geom_density(alpha = 0.5) +
  # geom_text(aes(experiment)) +
  theme_cowplot(20) +
  # facet_wrap(~exp) +
  xlab("Looking Time (s)")

  # scale_fill_brewer(palette = "Set2")

Figure S1. Density plot of looking times during test for Experiment 1 and from test events and control events for Experiments 2-3. Maximum-likelihood fitting revealed that the lognormal distribution (log likelihood = -2456.77) provides a better fit to these data than the normal distribution (log likelihood = -2624.45).

fam <- wide %>%
  filter(cost == "Danger") %>%
  gather(trial, look, fam1:test4) %>%
  mutate(trial_n = parse_number(trial)) %>%
  mutate(trial_type = str_extract(trial, "[a-z]+"))

fam$trial_type <- as.factor(fam$trial_type)
fam$trial_n <- as.factor(fam$trial_n)
fam$look <- as.numeric(as.character(fam$look))

famplot <- ggplot(data = fam, aes(trial_n, look, fill = trial_type))
famplot + geom_boxplot() +
  facet_wrap(~exp+trial_type, nrow = 2) +
  xlab("Trial N") +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar",width = 0.1) +
  stat_summary(fun= mean, geom = "point",shape = 5) +
  ylab("Looking time (s)") +
  theme_cowplot(20)

Figure S2. Boxplots of looking times during familiarization and test across Experiments (total N = 206). Error bars represent bootstrapped 95% confidence intervals around the mean.

Including influential observations (reviewer suggestion)

Below, we report the results from our pre-registered analyses including all observations, rather than excluding influential observations.

exp1.everyone.test.table <- gen.m(exp1.1)

exp1.everyone.test.ci <- gen.ci(exp1.1)[3:4,]

exp1.everyone.test.beta <- lmer(scale(loglook)~ type + (1 | subj),
                                     data = exp1.avg)

exp1.everyone.test.betas <- gen.beta(exp1.everyone.test.beta)

exp1.everyone.test.results <- cbind(exp1.everyone.test.betas,exp1.everyone.test.table,exp1.everyone.test.ci)
exp2.everyone.test.table <- gen.m(exp2.1)

exp2.everyone.test.ci <- gen.ci(exp2.1)[3:4,]

exp2.everyone.test.beta <- lmer(scale(loglook)~ type + (1 | subj),
                                     data = exp2.test)

exp2.everyone.test.betas <- gen.beta(exp2.everyone.test.beta)

exp2.everyone.test.results <- cbind(exp2.everyone.test.betas,exp2.everyone.test.table,exp2.everyone.test.ci)

exp2.everyone.pre.table <- gen.m(exp2.2)
exp2.everyone.pre.ci <- gen.ci(exp2.2)[3:4,]

exp2.everyone.pre.beta <- lmer(scale(loglook) ~ type + (1 | subj),
                                     data = exp2.control)
exp2.everyone.pre.betas <- gen.beta(exp2.everyone.pre.beta)

exp2.everyone.pre.results <- cbind(exp2.everyone.pre.betas,exp2.everyone.pre.table,exp2.everyone.pre.ci)

exp2.everyone.prevstest.table <- gen.m(exp2.3)
exp2.everyone.prevstest.ci <- gen.ci(exp2.3)[3:6,]

exp2.everyone.prevstest.beta <- lmer(scale(loglook) ~ cliff * phase + (1|subj), data = exp2.control.test)

exp2.everyone.prevstest.betas <- gen.beta(exp2.everyone.prevstest.beta)

exp2.everyone.prevstest.results <- cbind(exp2.everyone.prevstest.betas,exp2.everyone.prevstest.table,exp2.everyone.prevstest.ci)
exp3.everyone.test.table <- gen.m(exp3.1)
exp3.everyone.test.ci <- gen.ci(exp3.1)[3:4,]

exp3.everyone.test.beta <- lmer(scale(loglook)~ type + (1 | subj),
                                     data = exp3.avg)
exp3.everyone.test.betas <- gen.beta(exp3.everyone.test.beta)

exp3.everyone.test.results <- cbind(exp3.everyone.test.betas,exp3.everyone.test.table,exp3.everyone.test.ci)

exp3.everyone.pre.table <- gen.m(exp3.2)
exp3.everyone.pre.ci <- gen.ci(exp3.2)[3:4,]

exp3.everyone.pre.beta <- lmer(scale(loglook) ~ type + (1 | subj),
                                     data = exp3.control)
exp3.everyone.pre.betas <- gen.beta(exp3.everyone.pre.beta)

exp3.everyone.pre.results <- cbind(exp3.everyone.pre.betas,exp3.everyone.pre.table,exp3.everyone.pre.ci)

exp3.everyone.prevstest.table <- gen.m(exp3.3)
exp3.everyone.prevstest.ci <- gen.ci(exp3.3)[3:6,]

exp3.everyone.prevstest.beta <- lmer(scale(loglook) ~ cliff * phase + (1|subj), data = exp3.control.test)

exp3.everyone.prevstest.betas <- gen.beta(exp3.everyone.prevstest.beta)

exp3.everyone.prevstest.results <- cbind(exp3.everyone.prevstest.betas,exp3.everyone.prevstest.table,exp3.everyone.prevstest.ci)

Experiment 1

Infants looked longer at test when the agent, at test, chose the deeper trench over the shallower trench ([0.02,0.41], ß = 0.35, t(31) = 2.18, p = .037, two-tailed). These findings accord with those reported in the main text and support the interpretation that infants expected the agent to choose the goal for which it was willing to jump deeper trenches.

Experiment 2

Infants looked longer at test when the agent, at test, chose the deeper trench over the shallower trench ([0.07,0.51], ß = 0.43, t(29) = 2.59, p = .0075, one-tailed). During control events, 13-month-old infants preferred to look at the shallow trench ([-0.37,-0.02], ß = -0.34, t(27.3) = -2.2, p = .036, two-tailed). Their looking preferences significantly differed across the two phases of the experiment, [0.11,0.88], ß = 0.75, t(84.74) = 2.52, p = .013, two-tailed). These findings accord with those reported in the main text and support the interpretation that infants expected the agent to take the less dangerous action and therefore showed a greater looking preference for the test event than for the control event presenting events over the deeper trench.

Experiment 3

Infants looked longer at test when the agent chose to jump over the deeper trench ([0.09,0.41], ß = -1.05, t(41) = 3.06, p = .002, one-tailed). During control events, infants did not show a looking preference for either event ([-0.41,0.17], ß = -0.2, t(68) = -0.82, p = .418, two-tailed). Their looking preferences significantly differed across the test and control trials ([0.04,0.7], ß = 0.6, t(108.49) = 2.18, p = .032, two-tailed). This finding fully replicates the two key findings from Experiment 2 and accords with the findings reported in the main text.

Order effects in Experiment 1 (reviewer-requested exploratory analysis)

exp1.order <- lmer(data = exp1.avg,
                   formula = loglook ~ first_fam * type + (1|subj))
exp1.order.table <- gen.m(exp1.order)
exp1.order.ci <- gen.ci(exp1.order)[3:6,]


exp1.order.betas <- lmer(data = exp1.avg,
                   formula = scale(loglook) ~ first_fam * type + (1|subj))
exp1.order.beta <- gen.beta(exp1.order.betas)

exp1.ordereffects <- cbind(exp1.order.table, exp1.order.beta, exp1.order.ci)

Infants’ looking preferences at test did not vary depending on which sequence of events (low to high danger vs high to low danger) they were randomly assigned to watch in the first familiarization trial ([-0.22,0.54], ß = 0.27, t(30) = 0.82, p = .416, two-tailed). All infants saw both trial orders for 3 familiarization trials each.

Looking time to each familiarization event in Experiment 1 (reviewer-requested exploratory analysis)

detach("package:dplyr", unload = TRUE)
library(dplyr)
exp1.fam <- read.csv("./exp1_fam_data/exp1_fam_looks.csv", header = TRUE)

exp1.fam <- exp1.fam %>%
  separate(videoclip, into = c("depth", "yesno"), remove = FALSE) %>%
  rename(subj =  subjID)

exp1.fam$depth <- as.factor(exp1.fam$depth)
exp1.fam$yesno <- as.factor(exp1.fam$yesno)
exp1.fam$trial <- as.factor(exp1.fam$trial)
exp1.fam$subj <- as.factor(exp1.fam$subj)
exp1.fam$videoclip <- as.factor(exp1.fam$videoclip)

exp1.fam.glancedoff <- exp1.fam %>%
  mutate(glanced.off = case_when(proportion.on == 1.0 ~ 0,
                                proportion.on < 1.0 ~ 1))

exp1.fam.glancedoff.totalclips <- exp1.fam.glancedoff %>%
  select(subj, depth, videoclip, glanced.off) %>%
  group_by(subj, videoclip) %>%
  summarise(totalclips = n()) 

exp1.fam.glancedoff.freq <- exp1.fam.glancedoff %>%
  select(subj, depth, videoclip, glanced.off) %>%
  group_by(subj, videoclip) %>%
  tally(glanced.off)

exp1.fam.glanced.off.summary <- full_join(exp1.fam.glancedoff.totalclips, exp1.fam.glancedoff.freq) %>%
  mutate(prop.glancedoff = n/totalclips) %>%
  mutate(depth = case_when(videoclip == "deep_no" ~ "deep",
                           videoclip == "shallow_yes" ~ "shallow",
                           videoclip == "medium_no" ~ "medium",
                           videoclip == "medium_yes" ~ "medium"))

exp1.fam.glanced.off.summary$videoclip <- factor(exp1.fam.glanced.off.summary$videoclip, levels = c("shallow_yes", "medium_no", "medium_yes", "deep_no"))
figS3 <- exp1.fam.glanced.off.summary %>%
  ggplot(aes(videoclip, prop.glancedoff, fill = depth)) + 
  geom_boxplot() +
  geom_point(alpha = 0.3) +
  geom_line(alpha = 0.2, aes(group = subj)) +
  ylab("Proportion of events including look away") +
  xlab("Event type") +
  # facet_wrap(~depth) + 
  stat_summary(fun.data  = mean_cl_boot, geom = "errorbar",width = 0.2) +
    stat_summary(fun = mean,geom = "point",shape = 5, size = 3) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

figS3

Figure S3. Proportion of events during which infants glanced away from the screen, relative to how many times infants saw each event. Data come from a random subset of infants in Experiment 1 (N = 16 out of 32 total infants), with observations grouped by infant (points connected by grey lines). Error bars represent bootstrapped 95% confidence intervals around the mean. Infants look away from the screen with roughly equal probabilities across the 4 event types.

# compute 4 values per subject, total proportion to
# deep_no, medium_yes, medium_no, shallow_yes
exp1.fam.bymovie <- exp1.fam %>%
  group_by(subj,videoclip, depth, yesno) %>%
  mutate(proportion.on.total = mean(proportion.on)) %>%
  distinct(proportion.on.total)

exp1.fam.bymovie.wide <- exp1.fam.bymovie %>%
  pivot_wider(names_from = videoclip, values_from = proportion.on.total, id = subj)

exp1.avg.diff <- exp1.avg %>%
  filter(phase == "testavg") %>%
  pivot_wider(names_from = type, values_from = look, id = subj) %>%
  mutate(delta.look = lower-higher)

exp1.fam.glancedoff.wide <- exp1.fam.glanced.off.summary %>%
  pivot_wider(names_from = videoclip, values_from = prop.glancedoff, id = subj)

exp1.famtest <- full_join(exp1.fam.bymovie.wide, exp1.avg.diff, by = c("subj")) %>% 
  na.omit()
exp1.famtest.long <- exp1.famtest %>%
  gather(key = "movie_clip", value = "proportion_looking", shallow_yes:deep_no)

exp1.famtest.glanceoff <- full_join(exp1.fam.glancedoff.wide, exp1.avg.diff, by = c("subj")) %>% 
  na.omit()

exp1.famtest.glanceofflong <- exp1.famtest.glanceoff %>%
  gather(key = "movie_clip", value = "proportion_glanced_off", shallow_yes:deep_no)
theme_set(theme_cowplot(font_size = 15))

figS4A <- 
exp1.famtest.long %>%
  ggplot(aes(proportion_looking, delta.look)) + 
  geom_point() +
  geom_smooth(method = "lm") +
  # geom_line(alpha = 0.2, aes(group = subjID)) +
  xlab("Total proportion looking \n to movie clip") +
  ylab("Looking preference at test (s)\n
       <--- Longer looking to expected ---- Longer looking to unexpected --->") +
  facet_wrap(~movie_clip)

figS4A

Figure Sx [not in final SOM] Scatter plot of average proportion looking to each movie clip from familiarization and looking preferences at test.

cor.data <- exp1.famtest.glanceoff %>%
  select(shallow_yes:deep_no, delta.look) %>%
  rename(VOE_response = delta.look) %>%
  as.data.frame() 


corrplot(cor(cor.data[,-1]),
         method = 'circle',
         type = 'lower',
         addCoef.col  = 'black',
         diag = FALSE)

Figure S4. Correlation plot relating infants’ likelihood of looking away from each of the 4 familiarization events (proportion of events including a look away) to one another, and to infants’ violation of expectation response (unexpected - expected) at test. Values indicate Pearson’s correlations. Descriptively, the more infants looked away from the events, the smaller VOE response they showed at test.

fam.glance1 <- lmer(data = exp1.fam.glanced.off.summary,
               formula = prop.glancedoff ~ videoclip + (1|subj))
summary(fam.glance1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: prop.glancedoff ~ videoclip + (1 | subj)
##    Data: exp1.fam.glanced.off.summary
## 
## REML criterion at convergence: 6.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3654 -0.5786  0.0699  0.7238  1.4625 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.0304   0.174   
##  Residual             0.0376   0.194   
## Number of obs: 64, groups:  subj, 16
## 
## Fixed effects:
##                     Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)           0.5552     0.0652 37.4836    8.51  2.7e-10 ***
## videoclipmedium_no   -0.0208     0.0685 45.0000   -0.30     0.76    
## videoclipmedium_yes  -0.0333     0.0685 45.0000   -0.49     0.63    
## videoclipdeep_no     -0.0958     0.0685 45.0000   -1.40     0.17    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vdclpmdm_n vdclpmdm_y
## vidclpmdm_n -0.526                      
## vdclpmdm_ys -0.526  0.500               
## videclpdp_n -0.526  0.500      0.500
tab_model(fam.glance1, show.stat = TRUE,show.df = TRUE)
  prop glancedoff
Predictors Estimates CI Statistic p df
(Intercept) 0.56 0.42 – 0.69 8.51 <0.001 58.00
videoclip [medium no] -0.02 -0.16 – 0.12 -0.30 0.762 58.00
videoclip [medium yes] -0.03 -0.17 – 0.10 -0.49 0.629 58.00
videoclip [deep no] -0.10 -0.23 – 0.04 -1.40 0.167 58.00
Random Effects
σ2 0.04
τ00 subj 0.03
ICC 0.45
N subj 16
Observations 64
Marginal R2 / Conditional R2 0.019 / 0.458
plot(allEffects(fam.glance1))

fam.glance2 <- lm(data = exp1.famtest.glanceoff,
             formula = scale(delta.look) ~ scale(shallow_yes) + scale(medium_no) + scale(medium_yes) + scale(deep_no))
tab_model(fam.glance2, show.stat = TRUE,show.df = TRUE)
  scale(delta look)
Predictors Estimates CI Statistic p df
(Intercept) 0.00 -0.51 – 0.51 0.00 1.000 11.00
shallow yes -0.41 -1.15 – 0.33 -1.23 0.245 11.00
medium no -0.17 -0.77 – 0.43 -0.61 0.551 11.00
medium yes 0.21 -0.52 – 0.93 0.63 0.541 11.00
deep no -0.34 -0.94 – 0.27 -1.22 0.249 11.00
Observations 16
R2 / R2 adjusted 0.376 / 0.149
summary(fam.glance2)
## 
## Call:
## lm(formula = scale(delta.look) ~ scale(shallow_yes) + scale(medium_no) + 
##     scale(medium_yes) + scale(deep_no), data = exp1.famtest.glanceoff)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.476 -0.452 -0.221  0.586  1.557 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)         5.77e-17   2.31e-01    0.00     1.00
## scale(shallow_yes) -4.13e-01   3.37e-01   -1.23     0.25
## scale(medium_no)   -1.68e-01   2.73e-01   -0.61     0.55
## scale(medium_yes)   2.08e-01   3.29e-01    0.63     0.54
## scale(deep_no)     -3.36e-01   2.76e-01   -1.22     0.25
## 
## Residual standard error: 0.92 on 11 degrees of freedom
## Multiple R-squared:  0.376,  Adjusted R-squared:  0.149 
## F-statistic: 1.65 on 4 and 11 DF,  p-value: 0.23
plot(allEffects(fam.glance2))

In Experiment 1, infants, on average, looked longer when the agent jumped deeper trenches for one goal than another, and then chose the other goal later at test. One question is how infants used the information in each of the 4 familiarization events, presented in a looping sequence over 6 familiarization trials, in order to draw this inference. Rather than comparing the relative acceptances and refusals of the agent across 3 different levels of peril (shallow, medium, and deep trenches), one alternative hypothesis is that infants selectively attended when the agent accepted and refused the same obstacle (medium trench) for the two goals, and used this ‘go-no-go’ heuristic to infer that the agent prefers the goal it jumped for, over the goal it refused to jump for.

On this hypothesis, infants should be less likely to glance away from events involving medium trenches (vs the other events), and those who looked away less (i.e. attended more) to the medium trench events should have exhibited larger violation-of-expectation effects at test. To test these predictions, naive coders chose a random 50% of videos from Experiment 1 and annotated the onset and offset times of each iteration of each event in each familiarization loop, ignoring interleaved blank screens, and then annotated the onset and offset of infants’ attention to each event iteration. In the plots and following analyses, the events are named shallow_yes and medium_yes when the agent willingly jumped a shallow or medium trench, and medium_no and deep_no when the agent refused to jump a medium or deep trench. For each infant, we calculated the number of each kind of event they saw. Then, we calculated the proportion of those events that infants looked away from. If an infant looked away from the screen for any portion of the event, we marked that event as one where they looked away. Otherwise, we marked that event as one where they looked For example, if an infant saw 5 deep_no events and glanced away from the screen for 1 of them, this produced a score of 0.2 for that event type, for that infant. We then averaged these proportions within infants across all 4 event types, to produce 4 different proportion glance-away scores per infant. These scores are plotted in Figure S3, are related to each other, and to infants’ looking preferences at test, in Figure S4.

Overall, infants were equally likely to glance away from the screen (vs attend for the entire duration) during the 4 events. See Table S1 for results of the linear mixed effects model (lmer formula: prop.glancedoff ~ videoclip + (1|subj)). Thus, infants did not attend selectively to the events where they had the opportunity to compare the agent’s acceptance and refusal of the medium trench towards the two goals. Instead, they were equally likely to glance away from all 4 types of events.

Table S1. Infants’ probability of glancing away from the 4 video clips from familiarization in Experiment 1

fam1 <- lmer(data = exp1.fam.bymovie,
               formula = proportion.on.total ~ videoclip + (1|subj))
summary(fam1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: proportion.on.total ~ videoclip + (1 | subj)
##    Data: exp1.fam.bymovie
## 
## REML criterion at convergence: -149
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.044 -0.645  0.188  0.665  1.344 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  subj     (Intercept) 0.000997 0.0316  
##  Residual             0.003324 0.0577  
## Number of obs: 64, groups:  subj, 16
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           0.92221    0.01643 51.74040   56.12   <2e-16 ***
## videoclipmedium_no   -0.02259    0.02038 45.00000   -1.11     0.27    
## videoclipmedium_yes  -0.01641    0.02038 45.00000   -0.80     0.43    
## videoclipshallow_yes -0.00738    0.02038 45.00000   -0.36     0.72    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) vdclpmdm_n vdclpmdm_y
## vidclpmdm_n -0.620                      
## vdclpmdm_ys -0.620  0.500               
## vdclpshllw_ -0.620  0.500      0.500
tab_model(fam1, show.stat = TRUE, show.df = TRUE)
  proportion on total
Predictors Estimates CI Statistic p df
(Intercept) 0.92 0.89 – 0.96 56.12 <0.001 58.00
videoclip [medium no] -0.02 -0.06 – 0.02 -1.11 0.272 58.00
videoclip [medium yes] -0.02 -0.06 – 0.02 -0.80 0.424 58.00
videoclip [shallow yes] -0.01 -0.05 – 0.03 -0.36 0.719 58.00
Random Effects
σ2 0.00
τ00 subj 0.00
ICC 0.23
N subj 16
Observations 64
Marginal R2 / Conditional R2 0.017 / 0.244
plot(allEffects(fam1))

We then tested the second prediction: that infants who glanced away from the medium trench events (i.e. those who missed the critical information for a go-no-go strategy) would also show smaller violation-of-expectation responses at test. To do this, we calculated infants’ looking preference at test (average duration looking when the agent chose the less valued goal, minus average duration looking when the agent chose the more valued goal), and asked whether variability in infants’ looking behavior towards each of the 4 events predicted variability in these looking preferences. We found that infants’ tendency to glance away from the events involving medium trenches, or towards any of the 4 events, did not predict the magnitude of their violation-of-expectation response. See Table S2 for full results (lm formula: delta.look ~ shallow_yes + medium_no + medium_yes + deep_no).

Together, these findings suggest that infants did not selectively attend to the videos with the same trench depth during familiarization in Experiment 1 (or selectively glance away from the other events), and that their looking towards these videos did not predict stronger inferences about which goal was more valuable. Therefore, it appears unlikely that infants as a group used a “go-no-go” heuristic on the agent’s actions over the medium trenches in order to infer which the agent preferred. To be clear, we are not suggesting that infants could never use such a strategy. Instead we are suggesting that this strategy does not appear to explain the results of Experiment 1 (based on this analysis), or the results of Experiments 2-3 (in principle, based on the experimental design, in which the agent always accepts and never refuses jumping actions)

Table S2. Infants’ violation of expectation responses at test, as predicted by their tendency to glance away from the 4 video clips from familiarization in Experiment 1. Dependent and independent variables were z-scored prior to entry into the model.

fam2 <- lm(data = exp1.famtest,
             formula = scale(delta.look) ~ scale(shallow_yes) + scale(medium_no) + scale(medium_yes) + scale(deep_no))
tab_model(fam2, show.stat = TRUE,show.df = TRUE)
  scale(delta look)
Predictors Estimates CI Statistic p df
(Intercept) 0.00 -0.49 – 0.49 0.00 1.000 11.00
shallow yes -0.01 -0.57 – 0.54 -0.05 0.962 11.00
medium no 0.12 -0.43 – 0.68 0.49 0.634 11.00
medium yes 0.33 -0.31 – 0.97 1.12 0.285 11.00
deep no 0.38 -0.19 – 0.95 1.46 0.171 11.00
Observations 16
R2 / R2 adjusted 0.422 / 0.211
summary(fam2)
## 
## Call:
## lm(formula = scale(delta.look) ~ scale(shallow_yes) + scale(medium_no) + 
##     scale(medium_yes) + scale(deep_no), data = exp1.famtest)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8616 -0.2963  0.0412  0.3524  1.1196 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)         3.51e-16   2.22e-01    0.00     1.00
## scale(shallow_yes) -1.22e-02   2.52e-01   -0.05     0.96
## scale(medium_no)    1.23e-01   2.52e-01    0.49     0.63
## scale(medium_yes)   3.27e-01   2.91e-01    1.12     0.29
## scale(deep_no)      3.78e-01   2.58e-01    1.46     0.17
## 
## Residual standard error: 0.89 on 11 degrees of freedom
## Multiple R-squared:  0.422,  Adjusted R-squared:  0.211 
## F-statistic: 2.01 on 4 and 11 DF,  p-value: 0.163
plot(allEffects(fam2))

# sim <- powerCurve(extend(exp1.1, along = "subj", n = 500),
#                        along = "subj", breaks = c(36, 40, 44, 48, 52, 56, 60, 64, 68), alpha = .05, seed = 123)
# plot(sim)
# print(sim)
# reliability <- wide %>% filter(reliability == 1) %>%
#   select(subj, sex, experiment, test1, test2, test3, test4) %>%
#   gather(trial, look, test1:test4) %>%
#   mutate(trialn = str_remove(trial, "test")) %>%
#   group_by(subj, trialn)
# write.csv(reliability, "risk_rel.csv")